1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Colligator\Search; |
4
|
|
|
|
5
|
|
|
use Colligator\Collection; |
6
|
|
|
use Colligator\Document; |
7
|
|
|
use Colligator\Exceptions\InvalidQueryException; |
8
|
|
|
use Colligator\Http\Requests\SearchDocumentsRequest; |
9
|
|
|
use Elasticsearch\Client; |
10
|
|
|
use Elasticsearch\Common\Exceptions\BadRequest400Exception; |
11
|
|
|
use Elasticsearch\Common\Exceptions\Missing404Exception; |
12
|
|
|
|
13
|
|
|
class DocumentsIndex |
14
|
|
|
{ |
15
|
|
|
public $esIndex = 'documents'; |
16
|
|
|
public $esType = 'document'; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @var Client |
20
|
|
|
*/ |
21
|
|
|
public $client; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @var array |
25
|
|
|
*/ |
26
|
|
|
public $usage = []; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @param Client $client |
30
|
|
|
*/ |
31
|
|
|
public function __construct(Client $client) |
32
|
|
|
{ |
33
|
|
|
$this->client = $client; |
34
|
|
|
$this->esIndex = env('ES_INDEX', 'documents'); |
35
|
|
|
} |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Search for documents in ElasticSearch. |
39
|
|
|
* |
40
|
|
|
* @param SearchDocumentsRequest $request |
41
|
|
|
* |
42
|
|
|
* @return array |
43
|
|
|
*/ |
44
|
|
|
public function search(SearchDocumentsRequest $request) |
45
|
|
|
{ |
46
|
|
|
$payload = $this->basePayload(); |
47
|
|
|
$payload['from'] = $request->offset ?: 0; |
48
|
|
|
$payload['size'] = $request->limit ?: 25; |
49
|
|
|
|
50
|
|
|
$query = $this->queryStringFromRequest($request); |
51
|
|
|
if (!empty($query)) { |
52
|
|
|
$payload['body']['query']['query_string']['query'] = $query; |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
if ($request->has('sort')) { |
56
|
|
|
$payload['body']['sort'][$request->sort]['order'] = $request->get('order', 'asc'); |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
try { |
60
|
|
|
$response = $this->client->search($payload); |
61
|
|
|
} catch (BadRequest400Exception $e) { |
62
|
|
|
$response = json_decode($e->getMessage(), true); |
63
|
|
|
$msg = array_get($response, 'error.root_cause.0.reason') ?: array_get($response, 'error'); |
64
|
|
|
throw new InvalidQueryException($msg); |
65
|
|
|
} |
66
|
|
|
$response['offset'] = $payload['from']; |
67
|
|
|
|
68
|
|
|
return $response; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Return a single document identified by ID. |
73
|
|
|
* |
74
|
|
|
* @param int $id |
75
|
|
|
* |
76
|
|
|
* @return array |
77
|
|
|
*/ |
78
|
|
|
public function get($id) |
79
|
|
|
{ |
80
|
|
|
$payload = $this->basePayload(); |
81
|
|
|
$payload['id'] = $id; |
82
|
|
|
|
83
|
|
|
try { |
84
|
|
|
$response = $this->client->get($payload); |
85
|
|
|
} catch (Missing404Exception $e) { |
86
|
|
|
return; |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
return $response['_source']; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Escape special characters |
94
|
|
|
* http://lucene.apache.org/core/old_versioned_docs/versions/2_9_1/queryparsersyntax.html#Escaping Special Characters. |
95
|
|
|
* |
96
|
|
|
* @param string $value |
97
|
|
|
* |
98
|
|
|
* @return string |
99
|
|
|
*/ |
100
|
|
|
public function sanitizeForQuery($value) |
101
|
|
|
{ |
102
|
|
|
$chars = preg_quote('\\+-&|!(){}[]^~*?:'); |
103
|
|
|
$value = preg_replace('/([' . $chars . '])/', '\\\\\1', $value); |
104
|
|
|
|
105
|
|
|
return $value; |
106
|
|
|
// |
107
|
|
|
// # AND, OR and NOT are used by lucene as logical operators. We need |
108
|
|
|
// # to escape them |
109
|
|
|
// ['AND', 'OR', 'NOT'].each do |word| |
110
|
|
|
// escaped_word = word.split('').map {|char| "\\#{char}" }.join('') |
111
|
|
|
// str = str.gsub(/\s*\b(#{word.upcase})\b\s*/, " #{escaped_word} ") |
112
|
|
|
// end |
113
|
|
|
|
114
|
|
|
// # Escape odd quotes |
115
|
|
|
// quote_count = str.count '"' |
116
|
|
|
// str = str.gsub(/(.*)"(.*)/, '\1\"\3') if quote_count % 2 == 1 |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* Builds a query string query from a SearchDocumentsRequest. |
121
|
|
|
* |
122
|
|
|
* @param SearchDocumentsRequest $request |
123
|
|
|
* |
124
|
|
|
* @return string |
125
|
|
|
*/ |
126
|
|
|
public function queryStringFromRequest(SearchDocumentsRequest $request) |
127
|
|
|
{ |
128
|
|
|
$query = []; |
129
|
|
|
if ($request->has('q')) { |
130
|
|
|
// Allow raw queries |
131
|
|
|
$query[] = $request->q; |
132
|
|
|
} |
133
|
|
|
if ($request->has('collection')) { |
134
|
|
|
$col = Collection::findOrFail($request->collection); |
135
|
|
|
$query[] = 'collections:"' . $this->sanitizeForQuery($col->name) . '"'; |
136
|
|
|
} |
137
|
|
|
if ($request->has('subject')) { |
138
|
|
|
$query[] = '(subjects.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
139
|
|
|
' OR subjects.bare.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
140
|
|
|
' OR genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '")'; |
141
|
|
|
// TODO: Vi bør vel antakelig skille mellom X som emne og X som form/sjanger ? |
142
|
|
|
// Men da må frontend si fra hva den ønsker, noe den ikke gjør enda. |
143
|
|
|
} |
144
|
|
|
if ($request->has('language')) { |
145
|
|
|
$query[] = 'language:"' . $this->sanitizeForQuery($request->language) . '"' ; |
|
|
|
|
146
|
|
|
} |
147
|
|
|
if ($request->has('genre')) { |
148
|
|
|
$query[] = 'genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->genre) . '"'; |
149
|
|
|
} |
150
|
|
|
if ($request->has('real')) { |
151
|
|
|
dd('`real` is (very) deprecated, please use `subject` instead.'); |
152
|
|
|
} |
153
|
|
|
$query = count($query) ? implode(' AND ', $query) : ''; |
154
|
|
|
|
155
|
|
|
return $query; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
public function basePayload() |
159
|
|
|
{ |
160
|
|
|
return [ |
161
|
|
|
'index' => $this->esIndex, |
162
|
|
|
'type' => $this->esType, |
163
|
|
|
]; |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
public function getFullType($type) |
167
|
|
|
{ |
168
|
|
|
$typemap = ['subject' => 'Colligator\\Subject', 'genre' => 'Colligator\\Genre']; |
169
|
|
|
if (!isset($typemap[$type])) { |
170
|
|
|
throw new \InvalidArgumentException(); |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
return $typemap[$type]; |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
/** |
177
|
|
|
* Returns the number of documents the subject is used on. |
178
|
|
|
* |
179
|
|
|
* @param int $id |
180
|
|
|
* |
181
|
|
|
* @return int |
182
|
|
|
*/ |
183
|
|
|
public function getUsageCount($id, $type) |
184
|
|
|
{ |
185
|
|
|
$this->getFullType($type); |
186
|
|
|
$arg = $type . '.' . $id; |
187
|
|
|
if (is_null(array_get($this->usage, $arg))) { |
188
|
|
|
$this->addToUsageCache($id, $type); |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
return array_get($this->usage, $arg); |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
/** |
195
|
|
|
* Build an array of document usage count per subject. |
196
|
|
|
* |
197
|
|
|
* @param array|int $subject_ids |
198
|
|
|
* |
199
|
|
|
* @return array |
200
|
|
|
*/ |
201
|
|
|
public function addToUsageCache($entity_ids, $type) |
202
|
|
|
{ |
203
|
|
|
$fullType = $this->getFullType($type); |
204
|
|
|
if (!is_array($entity_ids)) { |
205
|
|
|
$entity_ids = [$entity_ids]; |
206
|
|
|
} |
207
|
|
|
$res = \DB::table('entities') |
208
|
|
|
->select(['entity_id', \DB::raw('count(document_id) as doc_count')]) |
209
|
|
|
->whereIn('entity_id', $entity_ids) |
210
|
|
|
->where('entity_type', $fullType) |
211
|
|
|
->groupBy('entity_id') |
212
|
|
|
->get(); |
213
|
|
|
|
214
|
|
|
foreach ($entity_ids as $sid) { |
215
|
|
|
array_set($this->usage, $type . '.' . $sid, 0); |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
foreach ($res as $row) { |
219
|
|
|
array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
220
|
|
|
} |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
public function buildCompleteUsageCache() |
224
|
|
|
{ |
225
|
|
|
$typemap = ['Colligator\\Subject' => 'subject', 'Colligator\\Genre' => 'genre']; |
226
|
|
|
$query = \DB::table('entities') |
227
|
|
|
->select(['entity_id', 'entity_type', \DB::raw('count(document_id) as doc_count')]) |
228
|
|
|
->groupBy('entity_id', 'entity_type'); |
229
|
|
|
$query->chunk(5000, function ($rows) use ($typemap) { |
230
|
|
|
foreach ($rows as $row) { |
231
|
|
|
$type = $typemap[$row->entity_type]; |
232
|
|
|
array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
233
|
|
|
} |
234
|
|
|
}); |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
/** |
238
|
|
|
* Add or update a document in the ElasticSearch index, making it searchable. |
239
|
|
|
* |
240
|
|
|
* @param Document $doc |
241
|
|
|
* @param int $indexVersion |
242
|
|
|
* |
243
|
|
|
* @throws \ErrorException |
244
|
|
|
*/ |
245
|
|
|
public function index(Document $doc, $indexVersion = null) |
246
|
|
|
{ |
247
|
|
|
$payload = $this->basePayload(); |
248
|
|
|
if (!is_null($indexVersion)) { |
249
|
|
|
$payload['index'] = $this->esIndex . '_v' . $indexVersion; |
250
|
|
|
} |
251
|
|
|
$payload['id'] = $doc->id; |
252
|
|
|
|
253
|
|
|
$sdoc = new SearchableDocument($doc, $this); |
254
|
|
|
$payload['body'] = $sdoc->toArray(); |
255
|
|
|
|
256
|
|
|
try { |
257
|
|
|
$this->client->index($payload); |
258
|
|
|
} catch (BadRequest400Exception $e) { |
259
|
|
|
\Log::error('ElasticSearch returned error: ' . $e->getMessage() . '. Our request: ' . var_export($payload, true)); |
260
|
|
|
throw new \ErrorException('ElasticSearch failed to index the document ' . $doc->id . '. Please see the log for payload and full error response. Error message: ' . $e->getMessage()); |
261
|
|
|
} |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
/** |
265
|
|
|
* Add or update a document in the ElasticSearch index, making it searchable. |
266
|
|
|
* |
267
|
|
|
* @param int $docId |
268
|
|
|
* |
269
|
|
|
* @throws \ErrorException |
270
|
|
|
*/ |
271
|
|
|
public function indexById($docId) |
272
|
|
|
{ |
273
|
|
|
$this->index(Document::with('subjects', 'cover')->findOrFail($docId)); |
|
|
|
|
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
public function createVersion($version = null) |
277
|
|
|
{ |
278
|
|
|
if (is_null($version)) { |
279
|
|
|
$version = $this->getCurrentVersion() + 1; |
280
|
|
|
} |
281
|
|
|
$indexParams = ['index' => $this->esIndex . '_v' . $version]; |
282
|
|
|
$indexParams['body']['settings']['analysis']['char_filter']['isbn_filter'] = [ |
283
|
|
|
'type' => 'pattern_replace', |
284
|
|
|
'pattern' => '-', |
285
|
|
|
'replacement' => '', |
286
|
|
|
]; |
287
|
|
|
$indexParams['body']['settings']['analysis']['analyzer']['isbn_analyzer'] = [ |
288
|
|
|
'type' => 'custom', |
289
|
|
|
'char_filter' => ['isbn_filter'], |
290
|
|
|
'tokenizer' => 'keyword', |
291
|
|
|
'filter' => ['lowercase'], |
292
|
|
|
]; |
293
|
|
|
$indexParams['body']['mappings']['document'] = [ |
294
|
|
|
'_source' => [ |
295
|
|
|
'enabled' => true, |
296
|
|
|
], |
297
|
|
|
'properties' => [ |
298
|
|
|
'id' => ['type' => 'integer'], |
299
|
|
|
'created' => ['type' => 'date'], |
300
|
|
|
'modified' => ['type' => 'date'], |
301
|
|
|
'bibsys_id' => ['type' => 'string', 'index' => 'not_analyzed'], |
302
|
|
|
'isbns' => [ |
303
|
|
|
'type' => 'string', |
304
|
|
|
'analyzer' => 'isbn_analyzer', |
305
|
|
|
], |
306
|
|
|
'holdings' => [ |
307
|
|
|
'properties' => [ |
308
|
|
|
'created' => ['type' => 'date'], |
309
|
|
|
'acquired' => ['type' => 'date'], |
310
|
|
|
], |
311
|
|
|
], |
312
|
|
|
'cover' => [ |
313
|
|
|
'properties' => [ |
314
|
|
|
'created' => ['type' => 'date'], |
315
|
|
|
'modified' => ['type' => 'date'], |
316
|
|
|
], |
317
|
|
|
], |
318
|
|
|
], |
319
|
|
|
]; |
320
|
|
|
$this->client->indices()->create($indexParams); |
321
|
|
|
|
322
|
|
|
return $version; |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
public function dropVersion($version) |
326
|
|
|
{ |
327
|
|
|
try { |
328
|
|
|
$this->client->indices()->delete([ |
329
|
|
|
'index' => $this->esIndex . '_v' . $version, |
330
|
|
|
]); |
331
|
|
|
} catch (Missing404Exception $e) { |
332
|
|
|
# Didn't exist in the beginning, that's ok. |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
public function addAction(&$actions, $action, $version) |
337
|
|
|
{ |
338
|
|
|
if ($version) { |
339
|
|
|
$actions[] = [$action => ['index' => $this->esIndex . '_v' . $version, 'alias' => $this->esIndex]]; |
340
|
|
|
} |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
public function activateVersion($newVersion) |
344
|
|
|
{ |
345
|
|
|
$oldVersion = $this->getCurrentVersion(); |
346
|
|
|
$actions = []; |
347
|
|
|
$this->addAction($actions, 'remove', $oldVersion); |
348
|
|
|
$this->addAction($actions, 'add', $newVersion); |
349
|
|
|
if (count($actions)) { |
350
|
|
|
$this->client->indices()->updateAliases(['body' => ['actions' => $actions]]); |
351
|
|
|
} |
352
|
|
|
} |
353
|
|
|
|
354
|
|
|
public function versionExists($version) |
355
|
|
|
{ |
356
|
|
|
return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
357
|
|
|
} |
358
|
|
|
|
359
|
|
|
public function getCurrentVersion() |
360
|
|
|
{ |
361
|
|
|
$currentIndex = null; |
362
|
|
|
foreach ($this->client->indices()->getAliases() as $index => $data) { |
363
|
|
|
if (in_array($this->esIndex, array_keys($data['aliases']))) { |
364
|
|
|
$currentIndex = $index; |
365
|
|
|
} |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
return is_null($currentIndex) ? 0 : intval(explode('_v', $currentIndex)[1]); |
369
|
|
|
} |
370
|
|
|
} |
371
|
|
|
|
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.
If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.