DocumentsIndex::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 5
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
namespace Colligator\Search;
4
5
use Colligator\Collection;
6
use Colligator\Document;
7
use Colligator\Exceptions\InvalidQueryException;
8
use Colligator\Http\Requests\SearchDocumentsRequest;
9
use Elasticsearch\Client;
10
use Elasticsearch\Common\Exceptions\BadRequest400Exception;
11
use Elasticsearch\Common\Exceptions\Missing404Exception;
12
13
class DocumentsIndex
14
{
15
    public $esIndex = 'documents';
16
    public $esType = 'document';
17
18
    /**
19
     * @var Client
20
     */
21
    public $client;
22
23
    /**
24
     * @var array
25
     */
26
    public $usage = [];
27
28
    /**
29
     * @param Client $client
30
     */
31
    public function __construct(Client $client)
32
    {
33
        $this->client = $client;
34
        $this->esIndex = env('ES_INDEX', 'documents');
35
    }
36
37
    /**
38
     * Search for documents in ElasticSearch.
39
     *
40
     * @param SearchDocumentsRequest $request
41
     *
42
     * @return array
43
     */
44
    public function search(SearchDocumentsRequest $request)
45
    {
46
        $payload = $this->basePayload();
47
        $payload['from'] = $request->offset ?: 0;
48
        $payload['size'] = $request->limit ?: 25;
49
50
        $query = $this->queryStringFromRequest($request);
51
        if (!empty($query)) {
52
            $payload['body']['query']['query_string']['query'] = $query;
53
        }
54
55
        if ($request->has('sort')) {
56
            $payload['body']['sort'][$request->sort]['order'] = $request->get('order', 'asc');
57
        }
58
59
        try {
60
            $response = $this->client->search($payload);
61
        } catch (BadRequest400Exception $e) {
62
            $response = json_decode($e->getMessage(), true);
63
            $msg = array_get($response, 'error.root_cause.0.reason') ?: array_get($response, 'error');
64
            throw new InvalidQueryException($msg);
65
        }
66
        $response['offset'] = $payload['from'];
67
68
        return $response;
69
    }
70
71
    /**
72
     * Return a single document identified by ID.
73
     *
74
     * @param int $id
75
     *
76
     * @return array
77
     */
78
    public function get($id)
79
    {
80
        $payload = $this->basePayload();
81
        $payload['id'] = $id;
82
83
        try {
84
            $response = $this->client->get($payload);
85
        } catch (Missing404Exception $e) {
86
            return;
87
        }
88
89
        return $response['_source'];
90
    }
91
92
    /**
93
     * Escape special characters
94
     * http://lucene.apache.org/core/old_versioned_docs/versions/2_9_1/queryparsersyntax.html#Escaping Special Characters.
95
     *
96
     * @param string $value
97
     *
98
     * @return string
99
     */
100
    public function sanitizeForQuery($value)
101
    {
102
        $chars = preg_quote('\\+-&|!(){}[]^~*?:');
103
        $value = preg_replace('/([' . $chars . '])/', '\\\\\1', $value);
104
105
        return $value;
106
        //
107
        // # AND, OR and NOT are used by lucene as logical operators. We need
108
        // # to escape them
109
        // ['AND', 'OR', 'NOT'].each do |word|
110
        //   escaped_word = word.split('').map {|char| "\\#{char}" }.join('')
111
        //   str = str.gsub(/\s*\b(#{word.upcase})\b\s*/, " #{escaped_word} ")
112
        // end
113
114
        // # Escape odd quotes
115
        // quote_count = str.count '"'
116
        // str = str.gsub(/(.*)"(.*)/, '\1\"\3') if quote_count % 2 == 1
117
    }
118
119
    /**
120
     * Builds a query string query from a SearchDocumentsRequest.
121
     *
122
     * @param SearchDocumentsRequest $request
123
     *
124
     * @return string
125
     */
126
    public function queryStringFromRequest(SearchDocumentsRequest $request)
127
    {
128
        $query = [];
129
        if ($request->has('q')) {
130
            // Allow raw queries
131
            $query[] = $request->q;
132
        }
133
        if ($request->has('collection')) {
134
            $col = Collection::findOrFail($request->collection);
135
            $query[] = 'collections:"' . $this->sanitizeForQuery($col->name) . '"';
136
        }
137
        if ($request->has('subject')) {
138
            $query[] = '(subjects.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' .
139
                    ' OR subjects.bare.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' .
140
                    ' OR genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '")';
141
                // TODO: Vi bør vel antakelig skille mellom X som emne og X som form/sjanger ?
142
                //       Men da må frontend si fra hva den ønsker, noe den ikke gjør enda.
143
        }
144
        if ($request->has('language')) {
145
            $query[] = 'language:"' . $this->sanitizeForQuery($request->language) . '"' ;
0 ignored issues
show
Bug introduced by
The property language does not seem to exist. Did you mean languages?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
146
        }
147
        if ($request->has('genre')) {
148
            $query[] = 'genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->genre) . '"';
149
        }
150
        if ($request->has('real')) {
151
            dd('`real` is (very) deprecated, please use `subject` instead.');
152
        }
153
        $query = count($query) ? implode(' AND ', $query) : '';
154
155
        return $query;
156
    }
157
158
    public function basePayload()
159
    {
160
        return [
161
            'index' => $this->esIndex,
162
            'type'  => $this->esType,
163
        ];
164
    }
165
166
    public function getFullType($type)
167
    {
168
        $typemap = ['subject' => 'Colligator\\Subject', 'genre' => 'Colligator\\Genre'];
169
        if (!isset($typemap[$type])) {
170
            throw new \InvalidArgumentException();
171
        }
172
173
        return $typemap[$type];
174
    }
175
176
    /**
177
     * Returns the number of documents the subject is used on.
178
     *
179
     * @param int $id
180
     *
181
     * @return int
182
     */
183
    public function getUsageCount($id, $type)
184
    {
185
        $this->getFullType($type);
186
        $arg = $type . '.' . $id;
187
        if (is_null(array_get($this->usage, $arg))) {
188
            $this->addToUsageCache($id, $type);
189
        }
190
191
        return array_get($this->usage, $arg);
192
    }
193
194
    /**
195
     * Build an array of document usage count per subject.
196
     *
197
     * @param array|int $subject_ids
198
     *
199
     * @return array
200
     */
201
    public function addToUsageCache($entity_ids, $type)
202
    {
203
        $fullType = $this->getFullType($type);
204
        if (!is_array($entity_ids)) {
205
            $entity_ids = [$entity_ids];
206
        }
207
        $res = \DB::table('entities')
208
            ->select(['entity_id', \DB::raw('count(document_id) as doc_count')])
209
            ->whereIn('entity_id', $entity_ids)
210
            ->where('entity_type', $fullType)
211
            ->groupBy('entity_id')
212
            ->get();
213
214
        foreach ($entity_ids as $sid) {
215
            array_set($this->usage, $type . '.' . $sid, 0);
216
        }
217
218
        foreach ($res as $row) {
219
            array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count));
220
        }
221
    }
222
223
    public function buildCompleteUsageCache()
224
    {
225
        $typemap = ['Colligator\\Subject' => 'subject', 'Colligator\\Genre' => 'genre'];
226
        $query = \DB::table('entities')
227
                    ->select(['entity_id', 'entity_type', \DB::raw('count(document_id) as doc_count')])
228
                    ->groupBy('entity_id', 'entity_type');
229
        $query->orderBy('entity_id')->orderBy('entity_type')->chunk(5000, function ($rows) use ($typemap) {
230
            foreach ($rows as $row) {
231
                $type = $typemap[$row->entity_type];
232
                array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count));
233
            }
234
        });
235
    }
236
237
    /**
238
     * Add or update a document in the ElasticSearch index, making it searchable.
239
     *
240
     * @param Document $doc
241
     * @param int      $indexVersion
242
     *
243
     * @throws \ErrorException
244
     */
245
    public function index(Document $doc, $indexVersion = null)
246
    {
247
        $payload = $this->basePayload();
248
        if (!is_null($indexVersion)) {
249
            $payload['index'] = $this->esIndex . '_v' . $indexVersion;
250
        }
251
        $payload['id'] = $doc->id;
252
253
        $sdoc = new SearchableDocument($doc, $this);
254
        $payload['body'] = $sdoc->toArray();
255
256
        try {
257
            $this->client->index($payload);
258
        } catch (BadRequest400Exception $e) {
259
            \Log::error('ElasticSearch returned error: ' . $e->getMessage() . '. Our request: ' . var_export($payload, true));
260
            throw new \ErrorException('ElasticSearch failed to index the document ' . $doc->id . '. Please see the log for payload and full error response. Error message: ' . $e->getMessage());
261
        }
262
    }
263
264
    /**
265
     * Add or update a document in the ElasticSearch index, making it searchable.
266
     *
267
     * @param int $docId
268
     *
269
     * @throws \ErrorException
270
     */
271
    public function indexById($docId)
272
    {
273
        $this->index(Document::with('subjects', 'cover')->findOrFail($docId));
0 ignored issues
show
Bug introduced by
The method findOrFail does only exist in Illuminate\Database\Eloquent\Builder, but not in Illuminate\Database\Eloquent\Model.

It seems like the method you are trying to call exists only in some of the possible types.

Let’s take a look at an example:

class A
{
    public function foo() { }
}

class B extends A
{
    public function bar() { }
}

/**
 * @param A|B $x
 */
function someFunction($x)
{
    $x->foo(); // This call is fine as the method exists in A and B.
    $x->bar(); // This method only exists in B and might cause an error.
}

Available Fixes

  1. Add an additional type-check:

    /**
     * @param A|B $x
     */
    function someFunction($x)
    {
        $x->foo();
    
        if ($x instanceof B) {
            $x->bar();
        }
    }
    
  2. Only allow a single type to be passed if the variable comes from a parameter:

    function someFunction(B $x) { /** ... */ }
    
Loading history...
274
    }
275
276
    public function createVersion($version = null)
277
    {
278
        if (is_null($version)) {
279
            $version = $this->getCurrentVersion() + 1;
280
        }
281
        $indexParams = ['index' => $this->esIndex . '_v' . $version];
282
        $indexParams['body']['settings']['analysis']['char_filter']['isbn_filter'] = [
283
            'type'        => 'pattern_replace',
284
            'pattern'     => '-',
285
            'replacement' => '',
286
        ];
287
        $indexParams['body']['settings']['analysis']['analyzer']['isbn_analyzer'] = [
288
            'type'        => 'custom',
289
            'char_filter' => ['isbn_filter'],
290
            'tokenizer'   => 'keyword',
291
            'filter'      => ['lowercase'],
292
        ];
293
        $indexParams['body']['mappings']['document'] = [
294
            '_source' => [
295
                'enabled' => true,
296
            ],
297
            'properties' => [
298
                'id'        => ['type' => 'integer'],
299
                'created'   => ['type' => 'date'],
300
                'modified'  => ['type' => 'date'],
301
                'bibsys_id' => ['type' => 'keyword'],
302
                'isbns'     => [
303
                    'type'     => 'text',
304
                    'analyzer' => 'isbn_analyzer',
305
                ],
306
                'holdings' => [
307
                    'properties' => [
308
                        'created'  => ['type' => 'date'],
309
                        'acquired' => ['type' => 'date'],
310
                    ],
311
                ],
312
                'cover' => [
313
                    'properties' => [
314
                        'created'  => ['type' => 'date'],
315
                        'modified' => ['type' => 'date'],
316
                    ],
317
                ],
318
            ],
319
        ];
320
        $this->client->indices()->create($indexParams);
321
322
        return $version;
323
    }
324
325
    public function dropVersion($version)
326
    {
327
        try {
328
            $this->client->indices()->delete([
329
                'index' => $this->esIndex . '_v' . $version,
330
            ]);
331
        } catch (Missing404Exception $e) {
332
            # Didn't exist in the beginning, that's ok.
333
        }
334
    }
335
336
    public function addAction(&$actions, $action, $version)
337
    {
338
        if ($version) {
339
            $actions[] = [$action => ['index' => $this->esIndex . '_v' . $version, 'alias' => $this->esIndex]];
340
        }
341
    }
342
343
    public function activateVersion($newVersion)
344
    {
345
        $oldVersion = $this->getCurrentVersion();
346
        $actions = [];
347
        $this->addAction($actions, 'remove', $oldVersion);
348
        $this->addAction($actions, 'add', $newVersion);
349
        if (count($actions)) {
350
            $this->client->indices()->updateAliases(['body' => ['actions' => $actions]]);
351
        }
352
    }
353
354
    public function versionExists($version)
355
    {
356
        return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]);
357
    }
358
359
    public function getCurrentVersion()
360
    {
361
        $currentIndex = null;
362
        foreach ($this->client->indices()->getAliases() as $index => $data) {
363
            if (in_array($this->esIndex, array_keys($data['aliases']))) {
364
                $currentIndex = $index;
365
            }
366
        }
367
368
        return is_null($currentIndex) ? 0 : intval(explode('_v', $currentIndex)[1]);
369
    }
370
}
371