DescriptionScraper   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 98
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 0
Metric Value
wmc 12
c 0
b 0
f 0
lcom 1
cbo 2
dl 0
loc 98
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 9 1
A register() 0 7 2
A notify() 0 10 1
A scrape() 0 10 3
A updateDocument() 0 38 5
1
<?php
2
3
namespace Colligator;
4
5
use GuzzleHttp\Exception\TransferException;
6
7
class DescriptionScraper
8
{
9
    public $doc;
10
    public $url;
11
    public $client;
12
    protected $scrapers;
13
    public $sleepTime = 7;
14
15
    /**
16
     * Create a new scraper.
17
     *
18
     * @param Client $client
19
     */
20
    public function __construct()
21
    {
22
        $this->register([
23
            Scrapers\BsScraper::class,
24
            Scrapers\LocScraper::class,
25
            Scrapers\FluxScraper::class,
26
            Scrapers\UnivScraper::class,
27
        ]);
28
    }
29
30
    public function register($scrapers)
31
    {
32
        $this->scrapers = [];
33
        foreach ($scrapers as $scraper) {
34
            $this->scrapers[] = new $scraper(); // We could do dependency injection here
35
        }
36
    }
37
38
    public function notify($msg, $doc, $args, $level = 'warning')
39
    {
40
        $msg = vsprintf($msg, $args);
41
        $docLink = sprintf('<http://colligator.biblionaut.net/api/documents/%s|#%s> ', $doc->id, $doc->id);
42
        \Slack::attach([
43
            'fallback' => '#' . $doc->id . ' ' . $msg,
44
            'text'     => $docLink . $msg,
45
            'color'    => $level,
46
        ])->send();
47
    }
48
49
    public function scrape($url)
50
    {
51
        foreach ($this->scrapers as $scraper) {
52
            if ($scraper->recognizes($url)) {
53
                return $scraper->scrape($scraper->getCrawler($url));
54
            }
55
        }
56
57
        return;
58
    }
59
60
    /**
61
     * Execute the job.
62
     *
63
     * @param Document $doc
64
     * @param string   $url
65
     */
66
    public function updateDocument(Document $doc, $url)
67
    {
68
        \Log::debug('[DescriptionScraper] Looking for decription for ' . $doc->id . ' at ' . $url);
69
70
        if (preg_match('/(damm.no)/', $url)) {
71
            \Log::debug('[DescriptionScraper] Ignoring URL: ' . $url);
72
73
            return;
74
        }
75
76
        try {
77
            $result = $this->scrape($url);
78
        } catch (TransferException $e) {
79
            \Log::error('[DescriptionScraper] Transfer failed: ' . $e->getMessage());
80
            $this->notify('*DescriptionScraper* failed to fetch: %s', $doc, [$url]);
81
82
            return;
83
        } catch (Scrapers\ScrapeException $e) {
84
            \Log::error('[DescriptionScraper] Scraping of ' . $url . ' failed: ' . $e->getMessage());
85
            $this->notify('*DescriptionScraper* / %s failed to find a text at: %s', $doc, [$e->getMessage(), $url]);
86
87
            return;
88
        }
89
        if (is_null($result)) {
90
            \Log::error('Encountered URL not recognized by any scraper: ' . $url);
91
            $this->notify('*DescriptionScraper* encountered URL not recognized by any sraper: %s', $doc, [$url]);
92
93
            return;
94
        }
95
96
        $doc->description = [
97
            'text'       => $result['text'],
98
            'source'     => $result['source'],
99
            'source_url' => $url,
100
        ];
101
102
        sleep($this->sleepTime);
103
    }
104
}
105