@@ -15,7 +15,6 @@ discard block |
||
15 | 15 | /** |
16 | 16 | * Create a new scraper. |
17 | 17 | * |
18 | - * @param Client $client |
|
19 | 18 | */ |
20 | 19 | public function __construct() |
21 | 20 | { |
@@ -35,6 +34,11 @@ discard block |
||
35 | 34 | } |
36 | 35 | } |
37 | 36 | |
37 | + /** |
|
38 | + * @param string $msg |
|
39 | + * @param Document $doc |
|
40 | + * @param string[] $args |
|
41 | + */ |
|
38 | 42 | public function notify($msg, $doc, $args, $level = 'warning') |
39 | 43 | { |
40 | 44 | $msg = vsprintf($msg, $args); |
@@ -46,6 +50,9 @@ discard block |
||
46 | 50 | ])->send(); |
47 | 51 | } |
48 | 52 | |
53 | + /** |
|
54 | + * @param string $url |
|
55 | + */ |
|
49 | 56 | public function scrape($url) |
50 | 57 | { |
51 | 58 | foreach ($this->scrapers as $scraper) { |
@@ -40,8 +40,8 @@ discard block |
||
40 | 40 | $msg = vsprintf($msg, $args); |
41 | 41 | $docLink = sprintf('<http://colligator.biblionaut.net/api/documents/%s|#%s> ', $doc->id, $doc->id); |
42 | 42 | \Slack::attach([ |
43 | - 'fallback' => '#' . $doc->id . ' ' . $msg, |
|
44 | - 'text' => $docLink . $msg, |
|
43 | + 'fallback' => '#'.$doc->id.' '.$msg, |
|
44 | + 'text' => $docLink.$msg, |
|
45 | 45 | 'color' => $level, |
46 | 46 | ])->send(); |
47 | 47 | } |
@@ -65,10 +65,10 @@ discard block |
||
65 | 65 | */ |
66 | 66 | public function updateDocument(Document $doc, $url) |
67 | 67 | { |
68 | - \Log::debug('[DescriptionScraper] Looking for decription for ' . $doc->id . ' at ' . $url); |
|
68 | + \Log::debug('[DescriptionScraper] Looking for decription for '.$doc->id.' at '.$url); |
|
69 | 69 | |
70 | 70 | if (preg_match('/(damm.no)/', $url)) { |
71 | - \Log::debug('[DescriptionScraper] Ignoring URL: ' . $url); |
|
71 | + \Log::debug('[DescriptionScraper] Ignoring URL: '.$url); |
|
72 | 72 | |
73 | 73 | return; |
74 | 74 | } |
@@ -76,18 +76,18 @@ discard block |
||
76 | 76 | try { |
77 | 77 | $result = $this->scrape($url); |
78 | 78 | } catch (TransferException $e) { |
79 | - \Log::error('[DescriptionScraper] Transfer failed: ' . $e->getMessage()); |
|
79 | + \Log::error('[DescriptionScraper] Transfer failed: '.$e->getMessage()); |
|
80 | 80 | $this->notify('*DescriptionScraper* failed to fetch: %s', $doc, [$url]); |
81 | 81 | |
82 | 82 | return; |
83 | 83 | } catch (Scrapers\ScrapeException $e) { |
84 | - \Log::error('[DescriptionScraper] Scraping of ' . $url . ' failed: ' . $e->getMessage()); |
|
84 | + \Log::error('[DescriptionScraper] Scraping of '.$url.' failed: '.$e->getMessage()); |
|
85 | 85 | $this->notify('*DescriptionScraper* / %s failed to find a text at: %s', $doc, [$e->getMessage(), $url]); |
86 | 86 | |
87 | 87 | return; |
88 | 88 | } |
89 | 89 | if (is_null($result)) { |
90 | - \Log::error('Encountered URL not recognized by any scraper: ' . $url); |
|
90 | + \Log::error('Encountered URL not recognized by any scraper: '.$url); |
|
91 | 91 | $this->notify('*DescriptionScraper* encountered URL not recognized by any sraper: %s', $doc, [$url]); |
92 | 92 | |
93 | 93 | return; |
@@ -10,7 +10,7 @@ discard block |
||
10 | 10 | /** |
11 | 11 | * Display a listing of the resource. |
12 | 12 | * |
13 | - * @return Response |
|
13 | + * @return \Illuminate\Http\JsonResponse |
|
14 | 14 | */ |
15 | 15 | public function index() |
16 | 16 | { |
@@ -24,7 +24,7 @@ discard block |
||
24 | 24 | * |
25 | 25 | * @param int $id |
26 | 26 | * |
27 | - * @return Response |
|
27 | + * @return \Illuminate\Http\JsonResponse |
|
28 | 28 | */ |
29 | 29 | public function show($id) |
30 | 30 | { |
@@ -2,7 +2,6 @@ |
||
2 | 2 | |
3 | 3 | namespace Colligator\Http\Controllers; |
4 | 4 | |
5 | -use Colligator\Cover; |
|
6 | 5 | use Colligator\Document; |
7 | 6 | use Colligator\Http\Requests\SearchDocumentsRequest; |
8 | 7 | use Colligator\Search\DocumentsIndex; |
@@ -13,7 +13,7 @@ discard block |
||
13 | 13 | /** |
14 | 14 | * Display a listing of the resource. |
15 | 15 | * |
16 | - * @return Response |
|
16 | + * @return \Illuminate\Http\JsonResponse |
|
17 | 17 | */ |
18 | 18 | public function index(SearchDocumentsRequest $request, DocumentsIndex $se) |
19 | 19 | { |
@@ -65,7 +65,7 @@ discard block |
||
65 | 65 | * @param DocumentsIndex $se |
66 | 66 | * @param int $id |
67 | 67 | * |
68 | - * @return Response |
|
68 | + * @return \Illuminate\Http\JsonResponse |
|
69 | 69 | */ |
70 | 70 | public function show(Request $request, DocumentsIndex $se, $id) |
71 | 71 | { |
@@ -138,7 +138,7 @@ discard block |
||
138 | 138 | /** |
139 | 139 | * Show cover. |
140 | 140 | * |
141 | - * @return Response |
|
141 | + * @return \Illuminate\Http\JsonResponse |
|
142 | 142 | */ |
143 | 143 | public function cover($document_id) |
144 | 144 | { |
@@ -152,7 +152,7 @@ discard block |
||
152 | 152 | /** |
153 | 153 | * Store cover. |
154 | 154 | * |
155 | - * @return Response |
|
155 | + * @return \Illuminate\Http\JsonResponse |
|
156 | 156 | */ |
157 | 157 | public function storeCover($document_id, Request $request, DocumentsIndex $se) |
158 | 158 | { |
@@ -204,7 +204,7 @@ discard block |
||
204 | 204 | /** |
205 | 205 | * Store description. |
206 | 206 | * |
207 | - * @return Response |
|
207 | + * @return \Illuminate\Http\JsonResponse |
|
208 | 208 | */ |
209 | 209 | public function storeDescription($document_id, Request $request, DocumentsIndex $se) |
210 | 210 | { |
@@ -235,7 +235,7 @@ discard block |
||
235 | 235 | /** |
236 | 236 | * Store "Cannot find cover" |
237 | 237 | * |
238 | - * @return Response |
|
238 | + * @return \Illuminate\Http\JsonResponse |
|
239 | 239 | */ |
240 | 240 | public function cannotFindCover($document_id, Request $request, DocumentsIndex $se) |
241 | 241 | { |
@@ -184,11 +184,11 @@ discard block |
||
184 | 184 | $cover = $cover->toArray(); |
185 | 185 | } |
186 | 186 | } catch (\ErrorException $e) { |
187 | - \Log::error('Failed to cache cover, got error: ' . $e->getMessage()); |
|
187 | + \Log::error('Failed to cache cover, got error: '.$e->getMessage()); |
|
188 | 188 | |
189 | 189 | return response()->json([ |
190 | 190 | 'result' => 'error', |
191 | - 'error' => 'Failed to store the cover. Please check that the URL points to a valid image file. Details: ' . $e->getMessage(), |
|
191 | + 'error' => 'Failed to store the cover. Please check that the URL points to a valid image file. Details: '.$e->getMessage(), |
|
192 | 192 | ]); |
193 | 193 | } |
194 | 194 | |
@@ -223,7 +223,7 @@ discard block |
||
223 | 223 | ]; |
224 | 224 | $doc->save(); |
225 | 225 | |
226 | - \Log::info('Stored new description for ' . $doc->id); |
|
226 | + \Log::info('Stored new description for '.$doc->id); |
|
227 | 227 | |
228 | 228 | $se->indexById($doc->id); |
229 | 229 | |
@@ -247,11 +247,11 @@ discard block |
||
247 | 247 | $doc->save(); |
248 | 248 | |
249 | 249 | } catch (\ErrorException $e) { |
250 | - \Log::error('Failed to store status, got error: ' . $e->getMessage()); |
|
250 | + \Log::error('Failed to store status, got error: '.$e->getMessage()); |
|
251 | 251 | |
252 | 252 | return response()->json([ |
253 | 253 | 'result' => 'error', |
254 | - 'error' => 'Failed to store status. Details: ' . $e->getMessage(), |
|
254 | + 'error' => 'Failed to store status. Details: '.$e->getMessage(), |
|
255 | 255 | ]); |
256 | 256 | } |
257 | 257 |
@@ -168,6 +168,7 @@ discard block |
||
168 | 168 | * Returns the number of documents the subject is used on. |
169 | 169 | * |
170 | 170 | * @param int $id |
171 | + * @param string $type |
|
171 | 172 | * |
172 | 173 | * @return int |
173 | 174 | */ |
@@ -185,8 +186,8 @@ discard block |
||
185 | 186 | /** |
186 | 187 | * Build an array of document usage count per subject. |
187 | 188 | * |
188 | - * @param array|int $subject_ids |
|
189 | 189 | * |
190 | + * @param integer $entity_ids |
|
190 | 191 | * @return array |
191 | 192 | */ |
192 | 193 | public function addToUsageCache($entity_ids, $type) |
@@ -264,6 +265,9 @@ discard block |
||
264 | 265 | $this->index(Document::with('subjects', 'cover')->findOrFail($docId)); |
265 | 266 | } |
266 | 267 | |
268 | + /** |
|
269 | + * @param integer $version |
|
270 | + */ |
|
267 | 271 | public function createVersion($version = null) |
268 | 272 | { |
269 | 273 | if (is_null($version)) { |
@@ -307,6 +311,9 @@ discard block |
||
307 | 311 | return $version; |
308 | 312 | } |
309 | 313 | |
314 | + /** |
|
315 | + * @param integer $version |
|
316 | + */ |
|
310 | 317 | public function dropVersion($version) |
311 | 318 | { |
312 | 319 | try { |
@@ -318,6 +325,9 @@ discard block |
||
318 | 325 | } |
319 | 326 | } |
320 | 327 | |
328 | + /** |
|
329 | + * @param string $action |
|
330 | + */ |
|
321 | 331 | public function addAction(&$actions, $action, $version) |
322 | 332 | { |
323 | 333 | if ($version) { |
@@ -325,6 +335,9 @@ discard block |
||
325 | 335 | } |
326 | 336 | } |
327 | 337 | |
338 | + /** |
|
339 | + * @param integer $newVersion |
|
340 | + */ |
|
328 | 341 | public function activateVersion($newVersion) |
329 | 342 | { |
330 | 343 | $oldVersion = $this->getCurrentVersion(); |
@@ -336,6 +349,9 @@ discard block |
||
336 | 349 | } |
337 | 350 | } |
338 | 351 | |
352 | + /** |
|
353 | + * @param integer $version |
|
354 | + */ |
|
339 | 355 | public function versionExists($version) |
340 | 356 | { |
341 | 357 | return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
@@ -95,7 +95,7 @@ discard block |
||
95 | 95 | public function sanitizeForQuery($value) |
96 | 96 | { |
97 | 97 | $chars = preg_quote('\\+-&|!(){}[]^~*?:'); |
98 | - $value = preg_replace('/([' . $chars . '])/', '\\\\\1', $value); |
|
98 | + $value = preg_replace('/(['.$chars.'])/', '\\\\\1', $value); |
|
99 | 99 | |
100 | 100 | return $value; |
101 | 101 | // |
@@ -127,20 +127,20 @@ discard block |
||
127 | 127 | } |
128 | 128 | if ($request->has('collection')) { |
129 | 129 | $col = Collection::findOrFail($request->collection); |
130 | - $query[] = 'collections:"' . $this->sanitizeForQuery($col->name) . '"'; |
|
130 | + $query[] = 'collections:"'.$this->sanitizeForQuery($col->name).'"'; |
|
131 | 131 | } |
132 | 132 | if ($request->has('subject')) { |
133 | - $query[] = '(subjects.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
|
134 | - ' OR subjects.bare.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
|
135 | - ' OR genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '")'; |
|
133 | + $query[] = '(subjects.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->subject).'"'. |
|
134 | + ' OR subjects.bare.prefLabel:"'.$this->sanitizeForQuery($request->subject).'"'. |
|
135 | + ' OR genres.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->subject).'")'; |
|
136 | 136 | // TODO: Vi bør vel antakelig skille mellom X som emne og X som form/sjanger ? |
137 | 137 | // Men da må frontend si fra hva den ønsker, noe den ikke gjør enda. |
138 | 138 | } |
139 | 139 | if ($request->has('language')) { |
140 | - $query[] = 'language:"' . $this->sanitizeForQuery($request->language) . '"' ; |
|
140 | + $query[] = 'language:"'.$this->sanitizeForQuery($request->language).'"'; |
|
141 | 141 | } |
142 | 142 | if ($request->has('genre')) { |
143 | - $query[] = 'genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->genre) . '"'; |
|
143 | + $query[] = 'genres.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->genre).'"'; |
|
144 | 144 | } |
145 | 145 | if ($request->has('real')) { |
146 | 146 | dd('`real` is (very) deprecated, please use `subject` instead.'); |
@@ -178,7 +178,7 @@ discard block |
||
178 | 178 | public function getUsageCount($id, $type) |
179 | 179 | { |
180 | 180 | $this->getFullType($type); |
181 | - $arg = $type . '.' . $id; |
|
181 | + $arg = $type.'.'.$id; |
|
182 | 182 | if (is_null(array_get($this->usage, $arg))) { |
183 | 183 | $this->addToUsageCache($id, $type); |
184 | 184 | } |
@@ -207,11 +207,11 @@ discard block |
||
207 | 207 | ->get(); |
208 | 208 | |
209 | 209 | foreach ($entity_ids as $sid) { |
210 | - array_set($this->usage, $type . '.' . $sid, 0); |
|
210 | + array_set($this->usage, $type.'.'.$sid, 0); |
|
211 | 211 | } |
212 | 212 | |
213 | 213 | foreach ($res as $row) { |
214 | - array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
|
214 | + array_set($this->usage, $type.'.'.$row->entity_id, intval($row->doc_count)); |
|
215 | 215 | } |
216 | 216 | } |
217 | 217 | |
@@ -221,10 +221,10 @@ discard block |
||
221 | 221 | $query = \DB::table('entities') |
222 | 222 | ->select(['entity_id', 'entity_type', \DB::raw('count(document_id) as doc_count')]) |
223 | 223 | ->groupBy('entity_id', 'entity_type'); |
224 | - $query->chunk(5000, function ($rows) use ($typemap) { |
|
224 | + $query->chunk(5000, function($rows) use ($typemap) { |
|
225 | 225 | foreach ($rows as $row) { |
226 | 226 | $type = $typemap[$row->entity_type]; |
227 | - array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
|
227 | + array_set($this->usage, $type.'.'.$row->entity_id, intval($row->doc_count)); |
|
228 | 228 | } |
229 | 229 | }); |
230 | 230 | } |
@@ -241,7 +241,7 @@ discard block |
||
241 | 241 | { |
242 | 242 | $payload = $this->basePayload(); |
243 | 243 | if (!is_null($indexVersion)) { |
244 | - $payload['index'] = $this->esIndex . '_v' . $indexVersion; |
|
244 | + $payload['index'] = $this->esIndex.'_v'.$indexVersion; |
|
245 | 245 | } |
246 | 246 | $payload['id'] = $doc->id; |
247 | 247 | |
@@ -251,8 +251,8 @@ discard block |
||
251 | 251 | try { |
252 | 252 | $this->client->index($payload); |
253 | 253 | } catch (BadRequest400Exception $e) { |
254 | - \Log::error('ElasticSearch returned error: ' . $e->getMessage() . '. Our request: ' . var_export($payload, true)); |
|
255 | - throw new \ErrorException('ElasticSearch failed to index the document ' . $doc->id . '. Please see the log for payload and full error response. Error message: ' . $e->getMessage()); |
|
254 | + \Log::error('ElasticSearch returned error: '.$e->getMessage().'. Our request: '.var_export($payload, true)); |
|
255 | + throw new \ErrorException('ElasticSearch failed to index the document '.$doc->id.'. Please see the log for payload and full error response. Error message: '.$e->getMessage()); |
|
256 | 256 | } |
257 | 257 | } |
258 | 258 | |
@@ -273,7 +273,7 @@ discard block |
||
273 | 273 | if (is_null($version)) { |
274 | 274 | $version = $this->getCurrentVersion() + 1; |
275 | 275 | } |
276 | - $indexParams = ['index' => $this->esIndex . '_v' . $version]; |
|
276 | + $indexParams = ['index' => $this->esIndex.'_v'.$version]; |
|
277 | 277 | $indexParams['body']['settings']['analysis']['char_filter']['isbn_filter'] = [ |
278 | 278 | 'type' => 'pattern_replace', |
279 | 279 | 'pattern' => '-', |
@@ -315,7 +315,7 @@ discard block |
||
315 | 315 | { |
316 | 316 | try { |
317 | 317 | $this->client->indices()->delete([ |
318 | - 'index' => $this->esIndex . '_v' . $version, |
|
318 | + 'index' => $this->esIndex.'_v'.$version, |
|
319 | 319 | ]); |
320 | 320 | } catch (Missing404Exception $e) { |
321 | 321 | # Didn't exist in the beginning, that's ok. |
@@ -325,7 +325,7 @@ discard block |
||
325 | 325 | public function addAction(&$actions, $action, $version) |
326 | 326 | { |
327 | 327 | if ($version) { |
328 | - $actions[] = [$action => ['index' => $this->esIndex . '_v' . $version, 'alias' => $this->esIndex]]; |
|
328 | + $actions[] = [$action => ['index' => $this->esIndex.'_v'.$version, 'alias' => $this->esIndex]]; |
|
329 | 329 | } |
330 | 330 | } |
331 | 331 | |
@@ -342,7 +342,7 @@ discard block |
||
342 | 342 | |
343 | 343 | public function versionExists($version) |
344 | 344 | { |
345 | - return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
|
345 | + return $this->client->indices()->exists(['index' => $this->esIndex.'_v'.$version]); |
|
346 | 346 | } |
347 | 347 | |
348 | 348 | public function getCurrentVersion() |
@@ -29,14 +29,14 @@ |
||
29 | 29 | protected function schedule(Schedule $schedule) |
30 | 30 | { |
31 | 31 | $schedule->command('colligator:harvest-oaipmh samling42 --daily') |
32 | - ->dailyAt('02:00'); |
|
32 | + ->dailyAt('02:00'); |
|
33 | 33 | |
34 | 34 | // Bring subject heading usage counts up-to-date |
35 | 35 | $schedule->command('colligator:reindex') |
36 | - ->weekly()->sundays()->at('04:00'); |
|
36 | + ->weekly()->sundays()->at('04:00'); |
|
37 | 37 | |
38 | 38 | // Check new documents for xisbn |
39 | 39 | $schedule->command('colligator:harvest-xisbn') |
40 | - ->weekly()->saturdays()->at('04:00'); |
|
40 | + ->weekly()->saturdays()->at('04:00'); |
|
41 | 41 | } |
42 | 42 | } |
@@ -20,7 +20,7 @@ |
||
20 | 20 | */ |
21 | 21 | public function register() |
22 | 22 | { |
23 | - \App::bind('covercache', function () { |
|
23 | + \App::bind('covercache', function() { |
|
24 | 24 | return new CoverCache(); |
25 | 25 | }); |
26 | 26 | } |
@@ -56,7 +56,7 @@ |
||
56 | 56 | |
57 | 57 | public function scrape(Crawler $crawler) |
58 | 58 | { |
59 | - $texts = $crawler->filter('#accordion > *')->each(function (Crawler $node) { |
|
59 | + $texts = $crawler->filter('#accordion > *')->each(function(Crawler $node) { |
|
60 | 60 | return $node->text(); |
61 | 61 | }); |
62 | 62 |
@@ -13,7 +13,7 @@ |
||
13 | 13 | |
14 | 14 | public function scrape(Crawler $crawler) |
15 | 15 | { |
16 | - $texts = $crawler->filter('.productPageBody > p')->each(function (Crawler $node) { |
|
16 | + $texts = $crawler->filter('.productPageBody > p')->each(function(Crawler $node) { |
|
17 | 17 | return $node->text(); |
18 | 18 | }); |
19 | 19 | $text = implode('\n\n', $texts); |
@@ -13,7 +13,7 @@ |
||
13 | 13 | |
14 | 14 | public function scrape(Crawler $crawler) |
15 | 15 | { |
16 | - $texts = $crawler->filter('.book-details > div')->each(function (Crawler $node) { |
|
16 | + $texts = $crawler->filter('.book-details > div')->each(function(Crawler $node) { |
|
17 | 17 | if (strpos($node->attr('class'), 'row') === false) { |
18 | 18 | return $node->text(); |
19 | 19 | } |