DescriptionScraper - Code Metrics - scriptotek/colligator-backend - Measure and Improve Code Quality continuously with Scrutinizer

DescriptionScraper A
last analyzed 2018-08-07 10:50 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	98
Duplicated Lines	0 %

Coupling/Cohesion

Components	1
Dependencies	2

Importance

Changes

Metric	Value
wmc	12
c	0
b	0
f	0
lcom	1
cbo	2
dl	0
loc	98
rs	10

5 Methods

Rating	Name	Size	Complexity
A	__construct()	9	1
A	register()	7	2
A	notify()	10	1
A	scrape()	10	3
A	updateDocument()	38	5

<?php

namespace Colligator;

use GuzzleHttp\Exception\TransferException;

class DescriptionScraper
{
    public $doc;
    public $url;
    public $client;
    protected $scrapers;
    public $sleepTime = 7;

    /**
     * Create a new scraper.
     *
     * @param Client $client
     */
    public function __construct()
    {
        $this->register([
            Scrapers\BsScraper::class,
            Scrapers\LocScraper::class,
            Scrapers\FluxScraper::class,
            Scrapers\UnivScraper::class,
        ]);
    }

    public function register($scrapers)
    {
        $this->scrapers = [];
        foreach ($scrapers as $scraper) {
            $this->scrapers[] = new $scraper(); // We could do dependency injection here
        }
    }

    public function notify($msg, $doc, $args, $level = 'warning')
    {
        $msg = vsprintf($msg, $args);
        $docLink = sprintf('<http://colligator.biblionaut.net/api/documents/%s|#%s> ', $doc->id, $doc->id);
        \Slack::attach([
            'fallback' => '#' . $doc->id . ' ' . $msg,
            'text'     => $docLink . $msg,
            'color'    => $level,
        ])->send();
    }

    public function scrape($url)
    {
        foreach ($this->scrapers as $scraper) {
            if ($scraper->recognizes($url)) {
                return $scraper->scrape($scraper->getCrawler($url));
            }
        }

        return;
    }

    /**
     * Execute the job.
     *
     * @param Document $doc
     * @param string   $url
     */
    public function updateDocument(Document $doc, $url)
    {
        \Log::debug('[DescriptionScraper] Looking for decription for ' . $doc->id . ' at ' . $url);

        if (preg_match('/(damm.no)/', $url)) {
            \Log::debug('[DescriptionScraper] Ignoring URL: ' . $url);

            return;
        }

        try {
            $result = $this->scrape($url);
        } catch (TransferException $e) {
            \Log::error('[DescriptionScraper] Transfer failed: ' . $e->getMessage());
            $this->notify('*DescriptionScraper* failed to fetch: %s', $doc, [$url]);

            return;
        } catch (Scrapers\ScrapeException $e) {
            \Log::error('[DescriptionScraper] Scraping of ' . $url . ' failed: ' . $e->getMessage());
            $this->notify('*DescriptionScraper* / %s failed to find a text at: %s', $doc, [$e->getMessage(), $url]);

            return;
        }
        if (is_null($result)) {
            \Log::error('Encountered URL not recognized by any scraper: ' . $url);
            $this->notify('*DescriptionScraper* encountered URL not recognized by any sraper: %s', $doc, [$url]);

            return;
        }

        $doc->description = [
            'text'       => $result['text'],
            'source'     => $result['source'],
            'source_url' => $url,
        ];

        sleep($this->sleepTime);
    }
}


1			<?php
2
3			namespace Colligator;
4
5			use GuzzleHttp\Exception\TransferException;
6
7			class DescriptionScraper
8			{
9			public $doc;
10			public $url;
11			public $client;
12			protected $scrapers;
13			public $sleepTime = 7;
14
15			/**
16			* Create a new scraper.
17			*
18			* @param Client $client
19			*/
20			public function __construct()
21			{
22			$this->register([
23			Scrapers\BsScraper::class,
24			Scrapers\LocScraper::class,
25			Scrapers\FluxScraper::class,
26			Scrapers\UnivScraper::class,
27			]);
28			}
29
30			public function register($scrapers)
31			{
32			$this->scrapers = [];
33			foreach ($scrapers as $scraper) {
34			$this->scrapers[] = new $scraper(); // We could do dependency injection here
35			}
36			}
37
38			public function notify($msg, $doc, $args, $level = 'warning')
39			{
40			$msg = vsprintf($msg, $args);
41			$docLink = sprintf('<http://colligator.biblionaut.net/api/documents/%s\|#%s> ', $doc->id, $doc->id);
42			\Slack::attach([
43			'fallback' => '#' . $doc->id . ' ' . $msg,
44			'text' => $docLink . $msg,
45			'color' => $level,
46			])->send();
47			}
48
49			public function scrape($url)
50			{
51			foreach ($this->scrapers as $scraper) {
52			if ($scraper->recognizes($url)) {
53			return $scraper->scrape($scraper->getCrawler($url));
54			}
55			}
56
57			return;
58			}
59
60			/**
61			* Execute the job.
62			*
63			* @param Document $doc
64			* @param string $url
65			*/
66			public function updateDocument(Document $doc, $url)
67			{
68			\Log::debug('[DescriptionScraper] Looking for decription for ' . $doc->id . ' at ' . $url);
69
70			if (preg_match('/(damm.no)/', $url)) {
71			\Log::debug('[DescriptionScraper] Ignoring URL: ' . $url);
72
73			return;
74			}
75
76			try {
77			$result = $this->scrape($url);
78			} catch (TransferException $e) {
79			\Log::error('[DescriptionScraper] Transfer failed: ' . $e->getMessage());
80			$this->notify('DescriptionScraper failed to fetch: %s', $doc, [$url]);
81
82			return;
83			} catch (Scrapers\ScrapeException $e) {
84			\Log::error('[DescriptionScraper] Scraping of ' . $url . ' failed: ' . $e->getMessage());
85			$this->notify('DescriptionScraper / %s failed to find a text at: %s', $doc, [$e->getMessage(), $url]);
86
87			return;
88			}
89			if (is_null($result)) {
90			\Log::error('Encountered URL not recognized by any scraper: ' . $url);
91			$this->notify('DescriptionScraper encountered URL not recognized by any sraper: %s', $doc, [$url]);
92
93			return;
94			}
95
96			$doc->description = [
97			'text' => $result['text'],
98			'source' => $result['source'],
99			'source_url' => $url,
100			];
101
102			sleep($this->sleepTime);
103			}
104			}
105

scriptotek / colligator-backend

DescriptionScraper A last analyzed 2018-08-07 10:50 UTC

Complexity

Size/Duplication

Coupling/Cohesion

Importance

5 Methods

Duplication Side-by-Side

Filter issues like

DescriptionScraper A
last analyzed 2018-08-07 10:50 UTC