@@ -15,7 +15,6 @@ discard block |
||
| 15 | 15 | /** |
| 16 | 16 | * Create a new scraper. |
| 17 | 17 | * |
| 18 | - * @param Client $client |
|
| 19 | 18 | */ |
| 20 | 19 | public function __construct() |
| 21 | 20 | { |
@@ -35,6 +34,11 @@ discard block |
||
| 35 | 34 | } |
| 36 | 35 | } |
| 37 | 36 | |
| 37 | + /** |
|
| 38 | + * @param string $msg |
|
| 39 | + * @param Document $doc |
|
| 40 | + * @param string[] $args |
|
| 41 | + */ |
|
| 38 | 42 | public function notify($msg, $doc, $args, $level = 'warning') |
| 39 | 43 | { |
| 40 | 44 | $msg = vsprintf($msg, $args); |
@@ -46,6 +50,9 @@ discard block |
||
| 46 | 50 | ])->send(); |
| 47 | 51 | } |
| 48 | 52 | |
| 53 | + /** |
|
| 54 | + * @param string $url |
|
| 55 | + */ |
|
| 49 | 56 | public function scrape($url) |
| 50 | 57 | { |
| 51 | 58 | foreach ($this->scrapers as $scraper) { |
@@ -40,8 +40,8 @@ discard block |
||
| 40 | 40 | $msg = vsprintf($msg, $args); |
| 41 | 41 | $docLink = sprintf('<http://colligator.biblionaut.net/api/documents/%s|#%s> ', $doc->id, $doc->id); |
| 42 | 42 | \Slack::attach([ |
| 43 | - 'fallback' => '#' . $doc->id . ' ' . $msg, |
|
| 44 | - 'text' => $docLink . $msg, |
|
| 43 | + 'fallback' => '#'.$doc->id.' '.$msg, |
|
| 44 | + 'text' => $docLink.$msg, |
|
| 45 | 45 | 'color' => $level, |
| 46 | 46 | ])->send(); |
| 47 | 47 | } |
@@ -65,10 +65,10 @@ discard block |
||
| 65 | 65 | */ |
| 66 | 66 | public function updateDocument(Document $doc, $url) |
| 67 | 67 | { |
| 68 | - \Log::debug('[DescriptionScraper] Looking for decription for ' . $doc->id . ' at ' . $url); |
|
| 68 | + \Log::debug('[DescriptionScraper] Looking for decription for '.$doc->id.' at '.$url); |
|
| 69 | 69 | |
| 70 | 70 | if (preg_match('/(damm.no)/', $url)) { |
| 71 | - \Log::debug('[DescriptionScraper] Ignoring URL: ' . $url); |
|
| 71 | + \Log::debug('[DescriptionScraper] Ignoring URL: '.$url); |
|
| 72 | 72 | |
| 73 | 73 | return; |
| 74 | 74 | } |
@@ -76,18 +76,18 @@ discard block |
||
| 76 | 76 | try { |
| 77 | 77 | $result = $this->scrape($url); |
| 78 | 78 | } catch (TransferException $e) { |
| 79 | - \Log::error('[DescriptionScraper] Transfer failed: ' . $e->getMessage()); |
|
| 79 | + \Log::error('[DescriptionScraper] Transfer failed: '.$e->getMessage()); |
|
| 80 | 80 | $this->notify('*DescriptionScraper* failed to fetch: %s', $doc, [$url]); |
| 81 | 81 | |
| 82 | 82 | return; |
| 83 | 83 | } catch (Scrapers\ScrapeException $e) { |
| 84 | - \Log::error('[DescriptionScraper] Scraping of ' . $url . ' failed: ' . $e->getMessage()); |
|
| 84 | + \Log::error('[DescriptionScraper] Scraping of '.$url.' failed: '.$e->getMessage()); |
|
| 85 | 85 | $this->notify('*DescriptionScraper* / %s failed to find a text at: %s', $doc, [$e->getMessage(), $url]); |
| 86 | 86 | |
| 87 | 87 | return; |
| 88 | 88 | } |
| 89 | 89 | if (is_null($result)) { |
| 90 | - \Log::error('Encountered URL not recognized by any scraper: ' . $url); |
|
| 90 | + \Log::error('Encountered URL not recognized by any scraper: '.$url); |
|
| 91 | 91 | $this->notify('*DescriptionScraper* encountered URL not recognized by any sraper: %s', $doc, [$url]); |
| 92 | 92 | |
| 93 | 93 | return; |
@@ -10,7 +10,7 @@ discard block |
||
| 10 | 10 | /** |
| 11 | 11 | * Display a listing of the resource. |
| 12 | 12 | * |
| 13 | - * @return Response |
|
| 13 | + * @return \Illuminate\Http\JsonResponse |
|
| 14 | 14 | */ |
| 15 | 15 | public function index() |
| 16 | 16 | { |
@@ -24,7 +24,7 @@ discard block |
||
| 24 | 24 | * |
| 25 | 25 | * @param int $id |
| 26 | 26 | * |
| 27 | - * @return Response |
|
| 27 | + * @return \Illuminate\Http\JsonResponse |
|
| 28 | 28 | */ |
| 29 | 29 | public function show($id) |
| 30 | 30 | { |
@@ -168,6 +168,7 @@ discard block |
||
| 168 | 168 | * Returns the number of documents the subject is used on. |
| 169 | 169 | * |
| 170 | 170 | * @param int $id |
| 171 | + * @param string $type |
|
| 171 | 172 | * |
| 172 | 173 | * @return int |
| 173 | 174 | */ |
@@ -185,8 +186,8 @@ discard block |
||
| 185 | 186 | /** |
| 186 | 187 | * Build an array of document usage count per subject. |
| 187 | 188 | * |
| 188 | - * @param array|int $subject_ids |
|
| 189 | 189 | * |
| 190 | + * @param integer $entity_ids |
|
| 190 | 191 | * @return array |
| 191 | 192 | */ |
| 192 | 193 | public function addToUsageCache($entity_ids, $type) |
@@ -264,6 +265,9 @@ discard block |
||
| 264 | 265 | $this->index(Document::with('subjects', 'cover')->findOrFail($docId)); |
| 265 | 266 | } |
| 266 | 267 | |
| 268 | + /** |
|
| 269 | + * @param integer $version |
|
| 270 | + */ |
|
| 267 | 271 | public function createVersion($version = null) |
| 268 | 272 | { |
| 269 | 273 | if (is_null($version)) { |
@@ -307,6 +311,9 @@ discard block |
||
| 307 | 311 | return $version; |
| 308 | 312 | } |
| 309 | 313 | |
| 314 | + /** |
|
| 315 | + * @param integer $version |
|
| 316 | + */ |
|
| 310 | 317 | public function dropVersion($version) |
| 311 | 318 | { |
| 312 | 319 | try { |
@@ -318,6 +325,9 @@ discard block |
||
| 318 | 325 | } |
| 319 | 326 | } |
| 320 | 327 | |
| 328 | + /** |
|
| 329 | + * @param string $action |
|
| 330 | + */ |
|
| 321 | 331 | public function addAction(&$actions, $action, $version) |
| 322 | 332 | { |
| 323 | 333 | if ($version) { |
@@ -325,6 +335,9 @@ discard block |
||
| 325 | 335 | } |
| 326 | 336 | } |
| 327 | 337 | |
| 338 | + /** |
|
| 339 | + * @param integer $newVersion |
|
| 340 | + */ |
|
| 328 | 341 | public function activateVersion($newVersion) |
| 329 | 342 | { |
| 330 | 343 | $oldVersion = $this->getCurrentVersion(); |
@@ -336,6 +349,9 @@ discard block |
||
| 336 | 349 | } |
| 337 | 350 | } |
| 338 | 351 | |
| 352 | + /** |
|
| 353 | + * @param integer $version |
|
| 354 | + */ |
|
| 339 | 355 | public function versionExists($version) |
| 340 | 356 | { |
| 341 | 357 | return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
@@ -100,7 +100,7 @@ discard block |
||
| 100 | 100 | public function sanitizeForQuery($value) |
| 101 | 101 | { |
| 102 | 102 | $chars = preg_quote('\\+-&|!(){}[]^~*?:'); |
| 103 | - $value = preg_replace('/([' . $chars . '])/', '\\\\\1', $value); |
|
| 103 | + $value = preg_replace('/(['.$chars.'])/', '\\\\\1', $value); |
|
| 104 | 104 | |
| 105 | 105 | return $value; |
| 106 | 106 | // |
@@ -132,20 +132,20 @@ discard block |
||
| 132 | 132 | } |
| 133 | 133 | if ($request->has('collection')) { |
| 134 | 134 | $col = Collection::findOrFail($request->collection); |
| 135 | - $query[] = 'collections:"' . $this->sanitizeForQuery($col->name) . '"'; |
|
| 135 | + $query[] = 'collections:"'.$this->sanitizeForQuery($col->name).'"'; |
|
| 136 | 136 | } |
| 137 | 137 | if ($request->has('subject')) { |
| 138 | - $query[] = '(subjects.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
|
| 139 | - ' OR subjects.bare.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '"' . |
|
| 140 | - ' OR genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->subject) . '")'; |
|
| 138 | + $query[] = '(subjects.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->subject).'"'. |
|
| 139 | + ' OR subjects.bare.prefLabel:"'.$this->sanitizeForQuery($request->subject).'"'. |
|
| 140 | + ' OR genres.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->subject).'")'; |
|
| 141 | 141 | // TODO: Vi bør vel antakelig skille mellom X som emne og X som form/sjanger ? |
| 142 | 142 | // Men da må frontend si fra hva den ønsker, noe den ikke gjør enda. |
| 143 | 143 | } |
| 144 | 144 | if ($request->has('language')) { |
| 145 | - $query[] = 'language:"' . $this->sanitizeForQuery($request->language) . '"' ; |
|
| 145 | + $query[] = 'language:"'.$this->sanitizeForQuery($request->language).'"'; |
|
| 146 | 146 | } |
| 147 | 147 | if ($request->has('genre')) { |
| 148 | - $query[] = 'genres.noubomn.prefLabel:"' . $this->sanitizeForQuery($request->genre) . '"'; |
|
| 148 | + $query[] = 'genres.noubomn.prefLabel:"'.$this->sanitizeForQuery($request->genre).'"'; |
|
| 149 | 149 | } |
| 150 | 150 | if ($request->has('real')) { |
| 151 | 151 | dd('`real` is (very) deprecated, please use `subject` instead.'); |
@@ -183,7 +183,7 @@ discard block |
||
| 183 | 183 | public function getUsageCount($id, $type) |
| 184 | 184 | { |
| 185 | 185 | $this->getFullType($type); |
| 186 | - $arg = $type . '.' . $id; |
|
| 186 | + $arg = $type.'.'.$id; |
|
| 187 | 187 | if (is_null(array_get($this->usage, $arg))) { |
| 188 | 188 | $this->addToUsageCache($id, $type); |
| 189 | 189 | } |
@@ -212,11 +212,11 @@ discard block |
||
| 212 | 212 | ->get(); |
| 213 | 213 | |
| 214 | 214 | foreach ($entity_ids as $sid) { |
| 215 | - array_set($this->usage, $type . '.' . $sid, 0); |
|
| 215 | + array_set($this->usage, $type.'.'.$sid, 0); |
|
| 216 | 216 | } |
| 217 | 217 | |
| 218 | 218 | foreach ($res as $row) { |
| 219 | - array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
|
| 219 | + array_set($this->usage, $type.'.'.$row->entity_id, intval($row->doc_count)); |
|
| 220 | 220 | } |
| 221 | 221 | } |
| 222 | 222 | |
@@ -226,10 +226,10 @@ discard block |
||
| 226 | 226 | $query = \DB::table('entities') |
| 227 | 227 | ->select(['entity_id', 'entity_type', \DB::raw('count(document_id) as doc_count')]) |
| 228 | 228 | ->groupBy('entity_id', 'entity_type'); |
| 229 | - $query->orderBy('entity_id')->orderBy('entity_type')->chunk(5000, function ($rows) use ($typemap) { |
|
| 229 | + $query->orderBy('entity_id')->orderBy('entity_type')->chunk(5000, function($rows) use ($typemap) { |
|
| 230 | 230 | foreach ($rows as $row) { |
| 231 | 231 | $type = $typemap[$row->entity_type]; |
| 232 | - array_set($this->usage, $type . '.' . $row->entity_id, intval($row->doc_count)); |
|
| 232 | + array_set($this->usage, $type.'.'.$row->entity_id, intval($row->doc_count)); |
|
| 233 | 233 | } |
| 234 | 234 | }); |
| 235 | 235 | } |
@@ -246,7 +246,7 @@ discard block |
||
| 246 | 246 | { |
| 247 | 247 | $payload = $this->basePayload(); |
| 248 | 248 | if (!is_null($indexVersion)) { |
| 249 | - $payload['index'] = $this->esIndex . '_v' . $indexVersion; |
|
| 249 | + $payload['index'] = $this->esIndex.'_v'.$indexVersion; |
|
| 250 | 250 | } |
| 251 | 251 | $payload['id'] = $doc->id; |
| 252 | 252 | |
@@ -256,8 +256,8 @@ discard block |
||
| 256 | 256 | try { |
| 257 | 257 | $this->client->index($payload); |
| 258 | 258 | } catch (BadRequest400Exception $e) { |
| 259 | - \Log::error('ElasticSearch returned error: ' . $e->getMessage() . '. Our request: ' . var_export($payload, true)); |
|
| 260 | - throw new \ErrorException('ElasticSearch failed to index the document ' . $doc->id . '. Please see the log for payload and full error response. Error message: ' . $e->getMessage()); |
|
| 259 | + \Log::error('ElasticSearch returned error: '.$e->getMessage().'. Our request: '.var_export($payload, true)); |
|
| 260 | + throw new \ErrorException('ElasticSearch failed to index the document '.$doc->id.'. Please see the log for payload and full error response. Error message: '.$e->getMessage()); |
|
| 261 | 261 | } |
| 262 | 262 | } |
| 263 | 263 | |
@@ -278,7 +278,7 @@ discard block |
||
| 278 | 278 | if (is_null($version)) { |
| 279 | 279 | $version = $this->getCurrentVersion() + 1; |
| 280 | 280 | } |
| 281 | - $indexParams = ['index' => $this->esIndex . '_v' . $version]; |
|
| 281 | + $indexParams = ['index' => $this->esIndex.'_v'.$version]; |
|
| 282 | 282 | $indexParams['body']['settings']['analysis']['char_filter']['isbn_filter'] = [ |
| 283 | 283 | 'type' => 'pattern_replace', |
| 284 | 284 | 'pattern' => '-', |
@@ -326,7 +326,7 @@ discard block |
||
| 326 | 326 | { |
| 327 | 327 | try { |
| 328 | 328 | $this->client->indices()->delete([ |
| 329 | - 'index' => $this->esIndex . '_v' . $version, |
|
| 329 | + 'index' => $this->esIndex.'_v'.$version, |
|
| 330 | 330 | ]); |
| 331 | 331 | } catch (Missing404Exception $e) { |
| 332 | 332 | # Didn't exist in the beginning, that's ok. |
@@ -336,7 +336,7 @@ discard block |
||
| 336 | 336 | public function addAction(&$actions, $action, $version) |
| 337 | 337 | { |
| 338 | 338 | if ($version) { |
| 339 | - $actions[] = [$action => ['index' => $this->esIndex . '_v' . $version, 'alias' => $this->esIndex]]; |
|
| 339 | + $actions[] = [$action => ['index' => $this->esIndex.'_v'.$version, 'alias' => $this->esIndex]]; |
|
| 340 | 340 | } |
| 341 | 341 | } |
| 342 | 342 | |
@@ -353,7 +353,7 @@ discard block |
||
| 353 | 353 | |
| 354 | 354 | public function versionExists($version) |
| 355 | 355 | { |
| 356 | - return $this->client->indices()->exists(['index' => $this->esIndex . '_v' . $version]); |
|
| 356 | + return $this->client->indices()->exists(['index' => $this->esIndex.'_v'.$version]); |
|
| 357 | 357 | } |
| 358 | 358 | |
| 359 | 359 | public function getCurrentVersion() |
@@ -56,7 +56,7 @@ |
||
| 56 | 56 | |
| 57 | 57 | public function scrape(Crawler $crawler) |
| 58 | 58 | { |
| 59 | - $texts = $crawler->filter('#accordion > *')->each(function (Crawler $node) { |
|
| 59 | + $texts = $crawler->filter('#accordion > *')->each(function(Crawler $node) { |
|
| 60 | 60 | return $node->text(); |
| 61 | 61 | }); |
| 62 | 62 | |
@@ -13,7 +13,7 @@ |
||
| 13 | 13 | |
| 14 | 14 | public function scrape(Crawler $crawler) |
| 15 | 15 | { |
| 16 | - $texts = $crawler->filter('.productPageBody > p')->each(function (Crawler $node) { |
|
| 16 | + $texts = $crawler->filter('.productPageBody > p')->each(function(Crawler $node) { |
|
| 17 | 17 | return $node->text(); |
| 18 | 18 | }); |
| 19 | 19 | $text = implode('\n\n', $texts); |
@@ -13,7 +13,7 @@ |
||
| 13 | 13 | |
| 14 | 14 | public function scrape(Crawler $crawler) |
| 15 | 15 | { |
| 16 | - $texts = $crawler->filter('.book-details > div')->each(function (Crawler $node) { |
|
| 16 | + $texts = $crawler->filter('.book-details > div')->each(function(Crawler $node) { |
|
| 17 | 17 | if (strpos($node->attr('class'), 'row') === false) { |
| 18 | 18 | return $node->text(); |
| 19 | 19 | } |
@@ -29,7 +29,7 @@ |
||
| 29 | 29 | */ |
| 30 | 30 | public function toArray() |
| 31 | 31 | { |
| 32 | - $body = $this->doc->bibliographic; // PHP makes a copy for us |
|
| 32 | + $body = $this->doc->bibliographic; // PHP makes a copy for us |
|
| 33 | 33 | |
| 34 | 34 | $body['id'] = $this->doc->id; |
| 35 | 35 | $body['bibsys_id'] = $this->doc->bibsys_id; |
@@ -10,12 +10,12 @@ discard block |
||
| 10 | 10 | 'AA' => 'audio', |
| 11 | 11 | 'AA BA' => 'audio book', |
| 12 | 12 | 'BA' => 'book', |
| 13 | - 'BA DA' => 'ebook', // Yes, we DO actually get these |
|
| 13 | + 'BA DA' => 'ebook', // Yes, we DO actually get these |
|
| 14 | 14 | 'BB' => 'hardcover', |
| 15 | - 'BB BC' => 'book', // ... and these |
|
| 16 | - 'BB DA' => 'ebook', // ... and these |
|
| 15 | + 'BB BC' => 'book', // ... and these |
|
| 16 | + 'BB DA' => 'ebook', // ... and these |
|
| 17 | 17 | 'BC' => 'paperback', |
| 18 | - 'BC DA' => 'ebook', // ... and these |
|
| 18 | + 'BC DA' => 'ebook', // ... and these |
|
| 19 | 19 | 'DA' => 'digital', |
| 20 | 20 | 'FA' => 'film/transp.', |
| 21 | 21 | 'MA' => 'microform', |
@@ -49,7 +49,7 @@ discard block |
||
| 49 | 49 | if (isset($this->formats[$formStr])) { |
| 50 | 50 | return $this->formats[$formStr]; |
| 51 | 51 | } |
| 52 | - $formStr = implode(' ', array_map(function ($el) { |
|
| 52 | + $formStr = implode(' ', array_map(function($el) { |
|
| 53 | 53 | return $this->formats[$el]; |
| 54 | 54 | }, $forms)); |
| 55 | 55 | \Log::warning(sprintf('Unknown form: %s', $formStr)); |
@@ -40,7 +40,7 @@ discard block |
||
| 40 | 40 | //$docIndex->dropVersion(); |
| 41 | 41 | $oldVersion = $docIndex->getCurrentVersion(); |
| 42 | 42 | $newVersion = $oldVersion + 1; |
| 43 | - $this->comment(' Old version: ' . $oldVersion . ', new version: ' . $newVersion); |
|
| 43 | + $this->comment(' Old version: '.$oldVersion.', new version: '.$newVersion); |
|
| 44 | 44 | |
| 45 | 45 | if ($docIndex->versionExists($newVersion)) { |
| 46 | 46 | $this->comment(' New version already existed, probably from a crashed job. Removing.'); |
@@ -58,7 +58,7 @@ discard block |
||
| 58 | 58 | $docCount = Document::count(); |
| 59 | 59 | $this->output->progressStart($docCount); |
| 60 | 60 | |
| 61 | - Document::with('subjects', 'genres', 'cover')->chunk(1000, function ($docs) use ($docIndex, $newVersion) { |
|
| 61 | + Document::with('subjects', 'genres', 'cover')->chunk(1000, function($docs) use ($docIndex, $newVersion) { |
|
| 62 | 62 | foreach ($docs as $doc) { |
| 63 | 63 | $docIndex->index($doc, $newVersion); |
| 64 | 64 | $this->output->progressAdvance(); |
@@ -73,7 +73,7 @@ discard block |
||
| 73 | 73 | $docIndex->dropVersion($oldVersion); |
| 74 | 74 | |
| 75 | 75 | $dt = microtime(true) - $t0; |
| 76 | - $this->info(' Completed in ' . round($dt) . ' seconds.'); |
|
| 77 | - \Log::info('[ReindexJob] Completed in ' . round($dt) . ' seconds.'); |
|
| 76 | + $this->info(' Completed in '.round($dt).' seconds.'); |
|
| 77 | + \Log::info('[ReindexJob] Completed in '.round($dt).' seconds.'); |
|
| 78 | 78 | } |
| 79 | 79 | } |