@@ -15,7 +15,6 @@ discard block |
||
15 | 15 | /** |
16 | 16 | * Create a new scraper. |
17 | 17 | * |
18 | - * @param Client $client |
|
19 | 18 | */ |
20 | 19 | public function __construct() |
21 | 20 | { |
@@ -35,6 +34,11 @@ discard block |
||
35 | 34 | } |
36 | 35 | } |
37 | 36 | |
37 | + /** |
|
38 | + * @param string $msg |
|
39 | + * @param Document $doc |
|
40 | + * @param string[] $args |
|
41 | + */ |
|
38 | 42 | public function notify($msg, $doc, $args, $level = 'warning') |
39 | 43 | { |
40 | 44 | $msg = vsprintf($msg, $args); |
@@ -46,6 +50,9 @@ discard block |
||
46 | 50 | ])->send(); |
47 | 51 | } |
48 | 52 | |
53 | + /** |
|
54 | + * @param string $url |
|
55 | + */ |
|
49 | 56 | public function scrape($url) |
50 | 57 | { |
51 | 58 | foreach ($this->scrapers as $scraper) { |
@@ -40,8 +40,8 @@ discard block |
||
40 | 40 | $msg = vsprintf($msg, $args); |
41 | 41 | $docLink = sprintf('<http://colligator.biblionaut.net/api/documents/%s|#%s> ', $doc->id, $doc->id); |
42 | 42 | \Slack::attach([ |
43 | - 'fallback' => '#' . $doc->id . ' ' . $msg, |
|
44 | - 'text' => $docLink . $msg, |
|
43 | + 'fallback' => '#'.$doc->id.' '.$msg, |
|
44 | + 'text' => $docLink.$msg, |
|
45 | 45 | 'color' => $level, |
46 | 46 | ])->send(); |
47 | 47 | } |
@@ -65,10 +65,10 @@ discard block |
||
65 | 65 | */ |
66 | 66 | public function updateDocument(Document $doc, $url) |
67 | 67 | { |
68 | - \Log::debug('[DescriptionScraper] Looking for decription for ' . $doc->id . ' at ' . $url); |
|
68 | + \Log::debug('[DescriptionScraper] Looking for decription for '.$doc->id.' at '.$url); |
|
69 | 69 | |
70 | 70 | if (preg_match('/(damm.no)/', $url)) { |
71 | - \Log::debug('[DescriptionScraper] Ignoring URL: ' . $url); |
|
71 | + \Log::debug('[DescriptionScraper] Ignoring URL: '.$url); |
|
72 | 72 | |
73 | 73 | return; |
74 | 74 | } |
@@ -76,18 +76,18 @@ discard block |
||
76 | 76 | try { |
77 | 77 | $result = $this->scrape($url); |
78 | 78 | } catch (TransferException $e) { |
79 | - \Log::error('[DescriptionScraper] Transfer failed: ' . $e->getMessage()); |
|
79 | + \Log::error('[DescriptionScraper] Transfer failed: '.$e->getMessage()); |
|
80 | 80 | $this->notify('*DescriptionScraper* failed to fetch: %s', $doc, [$url]); |
81 | 81 | |
82 | 82 | return; |
83 | 83 | } catch (Scrapers\ScrapeException $e) { |
84 | - \Log::error('[DescriptionScraper] Scraping of ' . $url . ' failed: ' . $e->getMessage()); |
|
84 | + \Log::error('[DescriptionScraper] Scraping of '.$url.' failed: '.$e->getMessage()); |
|
85 | 85 | $this->notify('*DescriptionScraper* / %s failed to find a text at: %s', $doc, [$e->getMessage(), $url]); |
86 | 86 | |
87 | 87 | return; |
88 | 88 | } |
89 | 89 | if (is_null($result)) { |
90 | - \Log::error('Encountered URL not recognized by any scraper: ' . $url); |
|
90 | + \Log::error('Encountered URL not recognized by any scraper: '.$url); |
|
91 | 91 | $this->notify('*DescriptionScraper* encountered URL not recognized by any sraper: %s', $doc, [$url]); |
92 | 92 | |
93 | 93 | return; |
@@ -10,7 +10,7 @@ discard block |
||
10 | 10 | /** |
11 | 11 | * Display a listing of the resource. |
12 | 12 | * |
13 | - * @return Response |
|
13 | + * @return \Illuminate\Http\JsonResponse |
|
14 | 14 | */ |
15 | 15 | public function index() |
16 | 16 | { |
@@ -24,7 +24,7 @@ discard block |
||
24 | 24 | * |
25 | 25 | * @param int $id |
26 | 26 | * |
27 | - * @return Response |
|
27 | + * @return \Illuminate\Http\JsonResponse |
|
28 | 28 | */ |
29 | 29 | public function show($id) |
30 | 30 | { |
@@ -168,6 +168,7 @@ discard block |
||
168 | 168 | * Returns the number of documents the subject is used on. |
169 | 169 | * |
170 | 170 | * @param int $id |
171 | + * @param string $type |
|
171 | 172 | * |
172 | 173 | * @return int |
173 | 174 | */ |
@@ -185,8 +186,8 @@ discard block |
||
185 | 186 | /** |
186 | 187 | * Build an array of document usage count per subject. |
187 | 188 | * |
188 | - * @param array|int $subject_ids |
|
189 | 189 | * |
190 | + * @param integer $entity_ids |
|
190 | 191 | * @return array |
191 | 192 | */ |
192 | 193 | public function addToUsageCache($entity_ids, $type) |
@@ -264,6 +265,9 @@ discard block |
||
264 | 265 | $this->index(Document::with('subjects', 'cover')->findOrFail($docId)); |
265 | 266 | } |
266 | 267 | |
268 | + /** |
|
269 | + * @param integer $version |
|
270 | + */ |
|
267 | 271 | public function createVersion($version = null) |
268 | 272 | { |
269 | 273 | if (is_null($version)) { |
@@ -307,6 +311,9 @@ discard block |
||
307 | 311 | return $version; |
308 | 312 | } |
309 | 313 | |
314 | + /** |
|
315 | + * @param integer $version |
|
316 | + */ |
|
310 | 317 | public function dropVersion($version) |
311 | 318 | { |
312 | 319 | try { |
@@ -318,6 +325,9 @@ discard block |
||
318 | 325 | } |
319 | 326 | } |
320 | 327 | |
328 | + /** |
|
329 | + * @param string $action |
|
330 | + */ |
|
321 | 331 | public function addAction(&$actions, $action, $version) |
322 | 332 | { |
323 | 333 | if ($version) { |
@@ -325,6 +335,9 @@ discard block |
||
325 | 335 | } |
326 | 336 | } |
327 | 337 | |
338 | + /** |
|
339 | + * @param integer $newVersion |
|
340 | + */ |
|
328 | 341 | public function activateVersion($newVersion) |
329 | 342 | { |
330 | 343 | $oldVersion = $this->getCurrentVersion(); |
@@ -336,6 +349,9 @@ discard block |
||
336 | 349 | } |
337 | 350 | } |
338 | 351 | |
352 | + /** |
|
353 | + * @param integer $version |
|
354 | + */ |
|
339 | 355 | public function versionExists($version) |
340 | 356 | { |
341 | 357 | return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
@@ -100,7 +100,7 @@ discard block |
||
100 | 100 | public function sanitizeForQuery($value) |
101 | 101 | { |
102 | 102 | $chars = preg_quote('\\+-&|!(){}[]^~*?:'); |
103 | - $value = preg_replace('/([' . $chars . '])/', '\\\\\1', $value); |
|
103 | + $value = preg_replace('/(['.$chars.'])/', '\\\\\1', $value); |
|
104 | 104 | |
105 | 105 | return $value; |
106 | 106 | // |
@@ -132,20 +132,20 @@ discard block |
||
132 | 132 | } |
133 | 133 | if ($request->has('collection')) { |
134 | 134 | $col = Collection::findOrFail($request->collection); |
135 | - $query[] = 'collections:"' . $this->sanitizeForQuery($col->name) . '"'; |
|
135 | + $query[] = 'collections:"'.$this->sanitizeForQuery($col->name).'"'; |
|
136 | 136 | } |
137 | 137 | if ($request->has('subject')) { |
138 | - $query[] = '(subjects.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
|
139 | - ' OR subjects.bare.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
|
140 | - ' OR genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '")'; |
|
138 | + $query[] = '(subjects.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->subject).'"'. |
|
139 | + ' OR subjects.bare.prefLabel:"'.$this->sanitizeForQuery($request->subject).'"'. |
|
140 | + ' OR genres.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->subject).'")'; |
|
141 | 141 | // TODO: Vi bør vel antakelig skille mellom X som emne og X som form/sjanger ? |
142 | 142 | // Men da må frontend si fra hva den ønsker, noe den ikke gjør enda. |
143 | 143 | } |
144 | 144 | if ($request->has('language')) { |
145 | - $query[] = 'language:"' . $this->sanitizeForQuery($request->language) . '"' ; |
|
145 | + $query[] = 'language:"'.$this->sanitizeForQuery($request->language).'"'; |
|
146 | 146 | } |
147 | 147 | if ($request->has('genre')) { |
148 | - $query[] = 'genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->genre) . '"'; |
|
148 | + $query[] = 'genres.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->genre).'"'; |
|
149 | 149 | } |
150 | 150 | if ($request->has('real')) { |
151 | 151 | dd('`real` is (very) deprecated, please use `subject` instead.'); |
@@ -183,7 +183,7 @@ discard block |
||
183 | 183 | public function getUsageCount($id, $type) |
184 | 184 | { |
185 | 185 | $this->getFullType($type); |
186 | - $arg = $type . '.' . $id; |
|
186 | + $arg = $type.'.'.$id; |
|
187 | 187 | if (is_null(array_get($this->usage, $arg))) { |
188 | 188 | $this->addToUsageCache($id, $type); |
189 | 189 | } |
@@ -212,11 +212,11 @@ discard block |
||
212 | 212 | ->get(); |
213 | 213 | |
214 | 214 | foreach ($entity_ids as $sid) { |
215 | - array_set($this->usage, $type . '.' . $sid, 0); |
|
215 | + array_set($this->usage, $type.'.'.$sid, 0); |
|
216 | 216 | } |
217 | 217 | |
218 | 218 | foreach ($res as $row) { |
219 | - array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
|
219 | + array_set($this->usage, $type.'.'.$row->entity_id, intval($row->doc_count)); |
|
220 | 220 | } |
221 | 221 | } |
222 | 222 | |
@@ -226,10 +226,10 @@ discard block |
||
226 | 226 | $query = \DB::table('entities') |
227 | 227 | ->select(['entity_id', 'entity_type', \DB::raw('count(document_id) as doc_count')]) |
228 | 228 | ->groupBy('entity_id', 'entity_type'); |
229 | - $query->orderBy('entity_id')->orderBy('entity_type')->chunk(5000, function ($rows) use ($typemap) { |
|
229 | + $query->orderBy('entity_id')->orderBy('entity_type')->chunk(5000, function($rows) use ($typemap) { |
|
230 | 230 | foreach ($rows as $row) { |
231 | 231 | $type = $typemap[$row->entity_type]; |
232 | - array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
|
232 | + array_set($this->usage, $type.'.'.$row->entity_id, intval($row->doc_count)); |
|
233 | 233 | } |
234 | 234 | }); |
235 | 235 | } |
@@ -246,7 +246,7 @@ discard block |
||
246 | 246 | { |
247 | 247 | $payload = $this->basePayload(); |
248 | 248 | if (!is_null($indexVersion)) { |
249 | - $payload['index'] = $this->esIndex . '_v' . $indexVersion; |
|
249 | + $payload['index'] = $this->esIndex.'_v'.$indexVersion; |
|
250 | 250 | } |
251 | 251 | $payload['id'] = $doc->id; |
252 | 252 | |
@@ -256,8 +256,8 @@ discard block |
||
256 | 256 | try { |
257 | 257 | $this->client->index($payload); |
258 | 258 | } catch (BadRequest400Exception $e) { |
259 | - \Log::error('ElasticSearch returned error: ' . $e->getMessage() . '. Our request: ' . var_export($payload, true)); |
|
260 | - throw new \ErrorException('ElasticSearch failed to index the document ' . $doc->id . '. Please see the log for payload and full error response. Error message: ' . $e->getMessage()); |
|
259 | + \Log::error('ElasticSearch returned error: '.$e->getMessage().'. Our request: '.var_export($payload, true)); |
|
260 | + throw new \ErrorException('ElasticSearch failed to index the document '.$doc->id.'. Please see the log for payload and full error response. Error message: '.$e->getMessage()); |
|
261 | 261 | } |
262 | 262 | } |
263 | 263 | |
@@ -278,7 +278,7 @@ discard block |
||
278 | 278 | if (is_null($version)) { |
279 | 279 | $version = $this->getCurrentVersion() + 1; |
280 | 280 | } |
281 | - $indexParams = ['index' => $this->esIndex . '_v' . $version]; |
|
281 | + $indexParams = ['index' => $this->esIndex.'_v'.$version]; |
|
282 | 282 | $indexParams['body']['settings']['analysis']['char_filter']['isbn_filter'] = [ |
283 | 283 | 'type' => 'pattern_replace', |
284 | 284 | 'pattern' => '-', |
@@ -326,7 +326,7 @@ discard block |
||
326 | 326 | { |
327 | 327 | try { |
328 | 328 | $this->client->indices()->delete([ |
329 | - 'index' => $this->esIndex . '_v' . $version, |
|
329 | + 'index' => $this->esIndex.'_v'.$version, |
|
330 | 330 | ]); |
331 | 331 | } catch (Missing404Exception $e) { |
332 | 332 | # Didn't exist in the beginning, that's ok. |
@@ -336,7 +336,7 @@ discard block |
||
336 | 336 | public function addAction(&$actions, $action, $version) |
337 | 337 | { |
338 | 338 | if ($version) { |
339 | - $actions[] = [$action => ['index' => $this->esIndex . '_v' . $version, 'alias' => $this->esIndex]]; |
|
339 | + $actions[] = [$action => ['index' => $this->esIndex.'_v'.$version, 'alias' => $this->esIndex]]; |
|
340 | 340 | } |
341 | 341 | } |
342 | 342 | |
@@ -353,7 +353,7 @@ discard block |
||
353 | 353 | |
354 | 354 | public function versionExists($version) |
355 | 355 | { |
356 | - return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
|
356 | + return $this->client->indices()->exists(['index' => $this->esIndex.'_v'.$version]); |
|
357 | 357 | } |
358 | 358 | |
359 | 359 | public function getCurrentVersion() |
@@ -56,7 +56,7 @@ |
||
56 | 56 | |
57 | 57 | public function scrape(Crawler $crawler) |
58 | 58 | { |
59 | - $texts = $crawler->filter('#accordion > *')->each(function (Crawler $node) { |
|
59 | + $texts = $crawler->filter('#accordion > *')->each(function(Crawler $node) { |
|
60 | 60 | return $node->text(); |
61 | 61 | }); |
62 | 62 |
@@ -13,7 +13,7 @@ |
||
13 | 13 | |
14 | 14 | public function scrape(Crawler $crawler) |
15 | 15 | { |
16 | - $texts = $crawler->filter('.productPageBody > p')->each(function (Crawler $node) { |
|
16 | + $texts = $crawler->filter('.productPageBody > p')->each(function(Crawler $node) { |
|
17 | 17 | return $node->text(); |
18 | 18 | }); |
19 | 19 | $text = implode('\n\n', $texts); |
@@ -13,7 +13,7 @@ |
||
13 | 13 | |
14 | 14 | public function scrape(Crawler $crawler) |
15 | 15 | { |
16 | - $texts = $crawler->filter('.book-details > div')->each(function (Crawler $node) { |
|
16 | + $texts = $crawler->filter('.book-details > div')->each(function(Crawler $node) { |
|
17 | 17 | if (strpos($node->attr('class'), 'row') === false) { |
18 | 18 | return $node->text(); |
19 | 19 | } |
@@ -29,7 +29,7 @@ |
||
29 | 29 | */ |
30 | 30 | public function toArray() |
31 | 31 | { |
32 | - $body = $this->doc->bibliographic; // PHP makes a copy for us |
|
32 | + $body = $this->doc->bibliographic; // PHP makes a copy for us |
|
33 | 33 | |
34 | 34 | $body['id'] = $this->doc->id; |
35 | 35 | $body['bibsys_id'] = $this->doc->bibsys_id; |
@@ -10,12 +10,12 @@ discard block |
||
10 | 10 | 'AA' => 'audio', |
11 | 11 | 'AA BA' => 'audio book', |
12 | 12 | 'BA' => 'book', |
13 | - 'BA DA' => 'ebook', // Yes, we DO actually get these |
|
13 | + 'BA DA' => 'ebook', // Yes, we DO actually get these |
|
14 | 14 | 'BB' => 'hardcover', |
15 | - 'BB BC' => 'book', // ... and these |
|
16 | - 'BB DA' => 'ebook', // ... and these |
|
15 | + 'BB BC' => 'book', // ... and these |
|
16 | + 'BB DA' => 'ebook', // ... and these |
|
17 | 17 | 'BC' => 'paperback', |
18 | - 'BC DA' => 'ebook', // ... and these |
|
18 | + 'BC DA' => 'ebook', // ... and these |
|
19 | 19 | 'DA' => 'digital', |
20 | 20 | 'FA' => 'film/transp.', |
21 | 21 | 'MA' => 'microform', |
@@ -49,7 +49,7 @@ discard block |
||
49 | 49 | if (isset($this->formats[$formStr])) { |
50 | 50 | return $this->formats[$formStr]; |
51 | 51 | } |
52 | - $formStr = implode(' ', array_map(function ($el) { |
|
52 | + $formStr = implode(' ', array_map(function($el) { |
|
53 | 53 | return $this->formats[$el]; |
54 | 54 | }, $forms)); |
55 | 55 | \Log::warning(sprintf('Unknown form: %s', $formStr)); |
@@ -40,7 +40,7 @@ discard block |
||
40 | 40 | //$docIndex->dropVersion(); |
41 | 41 | $oldVersion = $docIndex->getCurrentVersion(); |
42 | 42 | $newVersion = $oldVersion + 1; |
43 | - $this->comment(' Old version: ' . $oldVersion . ', new version: ' . $newVersion); |
|
43 | + $this->comment(' Old version: '.$oldVersion.', new version: '.$newVersion); |
|
44 | 44 | |
45 | 45 | if ($docIndex->versionExists($newVersion)) { |
46 | 46 | $this->comment(' New version already existed, probably from a crashed job. Removing.'); |
@@ -58,7 +58,7 @@ discard block |
||
58 | 58 | $docCount = Document::count(); |
59 | 59 | $this->output->progressStart($docCount); |
60 | 60 | |
61 | - Document::with('subjects', 'genres', 'cover')->chunk(1000, function ($docs) use ($docIndex, $newVersion) { |
|
61 | + Document::with('subjects', 'genres', 'cover')->chunk(1000, function($docs) use ($docIndex, $newVersion) { |
|
62 | 62 | foreach ($docs as $doc) { |
63 | 63 | $docIndex->index($doc, $newVersion); |
64 | 64 | $this->output->progressAdvance(); |
@@ -73,7 +73,7 @@ discard block |
||
73 | 73 | $docIndex->dropVersion($oldVersion); |
74 | 74 | |
75 | 75 | $dt = microtime(true) - $t0; |
76 | - $this->info(' Completed in ' . round($dt) . ' seconds.'); |
|
77 | - \Log::info('[ReindexJob] Completed in ' . round($dt) . ' seconds.'); |
|
76 | + $this->info(' Completed in '.round($dt).' seconds.'); |
|
77 | + \Log::info('[ReindexJob] Completed in '.round($dt).' seconds.'); |
|
78 | 78 | } |
79 | 79 | } |