Completed
Push — master ( 29bf34...2c2349 )
by Dan Michael O.
02:32
created

DocumentsIndex::createVersion()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 48
Code Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 48
rs 9.125
cc 2
eloc 34
nc 2
nop 1
1
<?php
2
3
namespace Colligator\Search;
4
5
use Colligator\Collection;
6
use Colligator\Document;
7
use Colligator\Exceptions\InvalidQueryException;
8
use Colligator\Http\Requests\SearchDocumentsRequest;
9
use Elasticsearch\Client;
10
use Elasticsearch\Common\Exceptions\BadRequest400Exception;
11
use Elasticsearch\Common\Exceptions\Missing404Exception;
12
13
class DocumentsIndex
14
{
15
    public $esIndex = 'documents';
16
    public $esType = 'document';
17
18
    /**
19
     * @var Client
20
     */
21
    public $client;
22
23
    /**
24
     * @var array
25
     */
26
    public $usage = [];
27
28
    /**
29
     * @param Client $client
30
     */
31
    public function __construct(Client $client)
32
    {
33
        $this->client = $client;
34
        $this->esIndex = env('ES_INDEX', 'documents');
35
    }
36
37
    /**
38
     * Search for documents in ElasticSearch.
39
     *
40
     * @param SearchDocumentsRequest $request
41
     *
42
     * @return array
43
     */
44
    public function search(SearchDocumentsRequest $request)
45
    {
46
        $payload = $this->basePayload();
47
        $payload['from'] = $request->offset ?: 0;
48
        $payload['size'] = $request->limit ?: 25;
49
50
        $query = $this->queryStringFromRequest($request);
51
        if (!empty($query)) {
52
            $payload['body']['query']['query_string']['query'] = $query;
53
        }
54
55
        try {
56
            $response = $this->client->search($payload);
57
        } catch (BadRequest400Exception $e) {
58
            $msg = json_decode($e->getMessage(), true);
59
            throw new InvalidQueryException($msg['error']);
60
        }
61
        $response['offset'] = $payload['from'];
62
63
        return $response;
64
    }
65
66
    /**
67
     * Return a single document identified by ID.
68
     *
69
     * @param int $id
70
     *
71
     * @return array
72
     */
73
    public function get($id)
74
    {
75
        $payload = $this->basePayload();
76
        $payload['id'] = $id;
77
78
        try {
79
            $response = $this->client->get($payload);
80
        } catch (Missing404Exception $e) {
81
            return;
82
        }
83
84
        return $response['_source'];
85
    }
86
87
    /**
88
     * Escape special characters
89
     * http://lucene.apache.org/core/old_versioned_docs/versions/2_9_1/queryparsersyntax.html#Escaping Special Characters.
90
     *
91
     * @param string $value
92
     *
93
     * @return string
94
     */
95
    public function sanitizeForQuery($value)
96
    {
97
        $chars = preg_quote('\\+-&|!(){}[]^~*?:');
98
        $value = preg_replace('/([' . $chars . '])/', '\\\\\1', $value);
99
100
        return $value;
101
        //
102
        // # AND, OR and NOT are used by lucene as logical operators. We need
103
        // # to escape them
104
        // ['AND', 'OR', 'NOT'].each do |word|
105
        //   escaped_word = word.split('').map {|char| "\\#{char}" }.join('')
106
        //   str = str.gsub(/\s*\b(#{word.upcase})\b\s*/, " #{escaped_word} ")
107
        // end
108
109
        // # Escape odd quotes
110
        // quote_count = str.count '"'
111
        // str = str.gsub(/(.*)"(.*)/, '\1\"\3') if quote_count % 2 == 1
112
    }
113
114
    /**
115
     * Builds a query string query from a SearchDocumentsRequest.
116
     *
117
     * @param SearchDocumentsRequest $request
118
     *
119
     * @return string
120
     */
121
    public function queryStringFromRequest(SearchDocumentsRequest $request)
122
    {
123
        $query = [];
124
        if ($request->has('q')) {
125
            // Allow raw queries
126
            $query[] = $request->q;
127
        }
128
        if ($request->has('collection')) {
129
            $col = Collection::findOrFail($request->collection);
130
            $query[] = 'collections:"' . $this->sanitizeForQuery($col->name) . '"';
131
        }
132
        if ($request->has('subject')) {
133
            $query[] = '(subjects.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' .
134
                    ' OR subjects.bare.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' .
135
                    ' OR genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '")';
136
                // TODO: Vi bør vel antakelig skille mellom X som emne og X som form/sjanger ?
137
                //       Men da må frontend si fra hva den ønsker, noe den ikke gjør enda.
138
        }
139
        if ($request->has('language')) {
140
            $query[] = 'language:"' . $this->sanitizeForQuery($request->language) . '"' ;
0 ignored issues
show
Bug introduced by
The property language does not seem to exist. Did you mean languages?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
141
        }
142
        if ($request->has('genre')) {
143
            $query[] = 'genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->genre) . '"';
144
        }
145
        if ($request->has('real')) {
146
            dd('`real` is (very) deprecated, please use `subject` instead.');
147
        }
148
        $query = count($query) ? implode(' AND ', $query) : '';
149
150
        return $query;
151
    }
152
153
    public function basePayload()
154
    {
155
        return [
156
            'index' => $this->esIndex,
157
            'type'  => $this->esType,
158
        ];
159
    }
160
161
    public function getFullType($type)
162
    {
163
        $typemap = ['subject' => 'Colligator\\Subject', 'genre' => 'Colligator\\Genre'];
164
        if (!isset($typemap[$type])) {
165
            throw new \InvalidArgumentException();
166
        }
167
168
        return $typemap[$type];
169
    }
170
171
    /**
172
     * Returns the number of documents the subject is used on.
173
     *
174
     * @param int $id
175
     *
176
     * @return int
177
     */
178
    public function getUsageCount($id, $type)
179
    {
180
        $this->getFullType($type);
181
        $arg = $type . '.' . $id;
182
        if (is_null(array_get($this->usage, $arg))) {
183
            $this->addToUsageCache($id, $type);
184
        }
185
186
        return array_get($this->usage, $arg);
187
    }
188
189
    /**
190
     * Build an array of document usage count per subject.
191
     *
192
     * @param array|int $subject_ids
193
     *
194
     * @return array
195
     */
196
    public function addToUsageCache($entity_ids, $type)
197
    {
198
        $fullType = $this->getFullType($type);
199
        if (!is_array($entity_ids)) {
200
            $entity_ids = [$entity_ids];
201
        }
202
        $res = \DB::table('entities')
203
            ->select(['entity_id', \DB::raw('count(document_id) as doc_count')])
204
            ->whereIn('entity_id', $entity_ids)
205
            ->where('entity_type', $fullType)
206
            ->groupBy('entity_id')
207
            ->get();
208
209
        foreach ($entity_ids as $sid) {
210
            array_set($this->usage, $type . '.' . $sid, 0);
211
        }
212
213
        foreach ($res as $row) {
214
            array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count));
215
        }
216
    }
217
218
    public function buildCompleteUsageCache()
219
    {
220
        $typemap = ['Colligator\\Subject' => 'subject', 'Colligator\\Genre' => 'genre'];
221
        $query = \DB::table('entities')
222
                    ->select(['entity_id', 'entity_type', \DB::raw('count(document_id) as doc_count')])
223
                    ->groupBy('entity_id', 'entity_type');
224
        $query->chunk(5000, function ($rows) use ($typemap) {
225
            foreach ($rows as $row) {
226
                $type = $typemap[$row->entity_type];
227
                array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count));
228
            }
229
        });
230
    }
231
232
    /**
233
     * Add or update a document in the ElasticSearch index, making it searchable.
234
     *
235
     * @param Document $doc
236
     * @param int      $indexVersion
237
     *
238
     * @throws \ErrorException
239
     */
240
    public function index(Document $doc, $indexVersion = null)
241
    {
242
        $payload = $this->basePayload();
243
        if (!is_null($indexVersion)) {
244
            $payload['index'] = $this->esIndex . '_v' . $indexVersion;
245
        }
246
        $payload['id'] = $doc->id;
247
248
        $sdoc = new SearchableDocument($doc, $this);
249
        $payload['body'] = $sdoc->toArray();
250
251
        try {
252
            $this->client->index($payload);
253
        } catch (BadRequest400Exception $e) {
254
            \Log::error('ElasticSearch returned error: ' . $e->getMessage() . '. Our request: ' . var_export($payload, true));
255
            throw new \ErrorException('ElasticSearch failed to index the document ' . $doc->id . '. Please see the log for payload and full error response. Error message: ' . $e->getMessage());
256
        }
257
    }
258
259
    /**
260
     * Add or update a document in the ElasticSearch index, making it searchable.
261
     *
262
     * @param int $docId
263
     *
264
     * @throws \ErrorException
265
     */
266
    public function indexById($docId)
267
    {
268
        $this->index(Document::with('subjects', 'cover')->findOrFail($docId));
0 ignored issues
show
Bug introduced by
The method findOrFail does only exist in Illuminate\Database\Eloquent\Builder, but not in Illuminate\Database\Eloquent\Model.

It seems like the method you are trying to call exists only in some of the possible types.

Let’s take a look at an example:

class A
{
    public function foo() { }
}

class B extends A
{
    public function bar() { }
}

/**
 * @param A|B $x
 */
function someFunction($x)
{
    $x->foo(); // This call is fine as the method exists in A and B.
    $x->bar(); // This method only exists in B and might cause an error.
}

Available Fixes

  1. Add an additional type-check:

    /**
     * @param A|B $x
     */
    function someFunction($x)
    {
        $x->foo();
    
        if ($x instanceof B) {
            $x->bar();
        }
    }
    
  2. Only allow a single type to be passed if the variable comes from a parameter:

    function someFunction(B $x) { /** ... */ }
    
Loading history...
269
    }
270
271
    public function createVersion($version = null)
272
    {
273
        if (is_null($version)) {
274
            $version = $this->getCurrentVersion() + 1;
275
        }
276
        $indexParams = ['index' => $this->esIndex . '_v' . $version];
277
        $indexParams['body']['settings']['analysis']['char_filter']['isbn_filter'] = [
278
            'type'        => 'pattern_replace',
279
            'pattern'     => '-',
280
            'replacement' => '',
281
        ];
282
        $indexParams['body']['settings']['analysis']['analyzer']['isbn_analyzer'] = [
283
            'type'        => 'custom',
284
            'char_filter' => ['isbn_filter'],
285
            'tokenizer'   => 'keyword',
286
            'filter'      => ['lowercase'],
287
        ];
288
        $indexParams['body']['mappings']['document'] = [
289
            '_source' => [
290
                'enabled' => true,
291
            ],
292
            'properties' => [
293
                'id'        => ['type' => 'integer'],
294
                'created'   => ['type' => 'date'],
295
                'modified'  => ['type' => 'date'],
296
                'bibsys_id' => ['type' => 'string', 'index' => 'not_analyzed'],
297
                'isbns'     => [
298
                    'type'     => 'string',
299
                    'analyzer' => 'isbn_analyzer',
300
                ],
301
                'holdings' => [
302
                    'properties' => [
303
                        'created'  => ['type' => 'date'],
304
                        'acquired' => ['type' => 'date'],
305
                    ],
306
                ],
307
                'cover' => [
308
                    'properties' => [
309
                        'created'  => ['type' => 'date'],
310
                        'modified' => ['type' => 'date'],
311
                    ],
312
                ],
313
            ],
314
        ];
315
        $this->client->indices()->create($indexParams);
316
317
        return $version;
318
    }
319
320
    public function dropVersion($version)
321
    {
322
        try {
323
            $this->client->indices()->delete([
324
                'index' => $this->esIndex . '_v' . $version,
325
            ]);
326
        } catch (Missing404Exception $e) {
327
            # Didn't exist in the beginning, that's ok.
328
        }
329
    }
330
331
    public function addAction(&$actions, $action, $version)
332
    {
333
        if ($version) {
334
            $actions[] = [$action => ['index' => $this->esIndex . '_v' . $version, 'alias' => $this->esIndex]];
335
        }
336
    }
337
338
    public function activateVersion($newVersion)
339
    {
340
        $oldVersion = $this->getCurrentVersion();
341
        $actions = [];
342
        $this->addAction($actions, 'remove', $oldVersion);
343
        $this->addAction($actions, 'add', $newVersion);
344
        if (count($actions)) {
345
            $this->client->indices()->updateAliases(['body' => ['actions' => $actions]]);
346
        }
347
    }
348
349
    public function versionExists($version)
350
    {
351
        return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]);
352
    }
353
354
    public function getCurrentVersion()
355
    {
356
        $currentIndex = null;
357
        foreach ($this->client->indices()->getAliases() as $index => $data) {
358
            if (in_array($this->esIndex, array_keys($data['aliases']))) {
359
                $currentIndex = $index;
360
            }
361
        }
362
363
        return is_null($currentIndex) ? 0 : intval(explode('_v', $currentIndex)[1]);
364
    }
365
}
366