GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

AbstractCrawler   A
last analyzed

Complexity

Total Complexity 21

Size/Duplication

Total Lines 154
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 7

Importance

Changes 0
Metric Value
wmc 21
lcom 2
cbo 7
dl 0
loc 154
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A crawl() 0 8 2
A parseEvent() 0 34 5
A parseVenue() 0 18 2
A parseUrl() 0 12 3
A parseImage() 0 10 2
A parseString() 0 21 4
A parseDate() 0 10 2
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * (c) Christian Gripp <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Core23\LastFm\Crawler;
13
14
use Core23\LastFm\Connection\ConnectionInterface;
15
use Core23\LastFm\Exception\CrawlException;
16
use Core23\LastFm\Model\Event;
17
use Core23\LastFm\Model\Image;
18
use Core23\LastFm\Model\Venue;
19
use Core23\LastFm\Model\VenueAddress;
20
use DateTime;
21
use Exception;
22
use Symfony\Component\DomCrawler\Crawler;
23
24
abstract class AbstractCrawler
25
{
26
    public const URL_PREFIX = 'http://last.fm';
27
28
    public const NEWLINE = "\n";
29
30
    /**
31
     * @var ConnectionInterface
32
     */
33
    private $connection;
34
35
    public function __construct(ConnectionInterface $connection)
36
    {
37
        $this->connection = $connection;
38
    }
39
40
    /**
41
     * Crawles a url.
42
     */
43
    final protected function crawl(string $url, array $params = []): ?Crawler
44
    {
45
        if (null !== $content = $this->connection->getPageBody($url, $params)) {
46
            return new Crawler($content);
47
        }
48
49
        return null;
50
    }
51
52
    /**
53
     * @param DateTime $datetime
54
     */
55
    final protected function parseEvent(Crawler $node, DateTime $datetime = null): Event
56
    {
57
        $eventNode = $node->filter('.events-list-item-event--title a');
58
59
        $url = $this->parseUrl($eventNode);
60
61
        if (null === $url) {
62
            throw new CrawlException('Error parsing event id.');
63
        }
64
65
        $id = (int) preg_replace('/.*\/(\d+)+.*/', '$1', $url);
66
67
        if (0 === $id) {
68
            throw new CrawlException('Error parsing event id.');
69
        }
70
71
        if (null === $datetime) {
72
            try {
73
                $datetime = new DateTime($node->filter('time')->attr('datetime'));
74
            } catch (Exception $exception) {
75
                throw new CrawlException('Error reading event date', $exception->getCode(), $exception);
76
            }
77
        }
78
79
        $venue = $this->parseVenue($node->filter('.events-list-item-venue'));
80
81
        return new Event(
82
            $id,
83
            $this->parseString($eventNode) ?? '',
84
            $datetime,
85
            $url,
86
            $venue
87
        );
88
    }
89
90
    final protected function parseVenue(Crawler $node): ?Venue
91
    {
92
        $title   = $this->parseString($node->filter('.events-list-item-venue--title'));
93
94
        if (null === $title) {
95
            return null;
96
        }
97
98
        $city    = $this->parseString($node->filter('.events-list-item-venue--city'));
99
        $country = $this->parseString($node->filter('.events-list-item-venue--country'));
100
101
        return new Venue($title, null, null, new VenueAddress(
102
            null,
103
            null,
104
            $city,
105
            $country
106
        ));
107
    }
108
109
    /**
110
     * Parses a url node.
111
     */
112
    final protected function parseUrl(Crawler $node, string $attr = 'href'): ?string
113
    {
114
        if (0 === $node->count()) {
115
            return null;
116
        }
117
118
        if (null !== $url = $node->attr($attr)) {
119
            return preg_replace('/^\//', static::URL_PREFIX.'/', $url);
120
        }
121
122
        return null;
123
    }
124
125
    /**
126
     * Parses an image node.
127
     */
128
    final protected function parseImage(Crawler $node): ?Image
129
    {
130
        $src = $this->parseUrl($node, 'src');
131
132
        if (null === $src) {
133
            return null;
134
        }
135
136
        return new Image($src);
137
    }
138
139
    /**
140
     * Parses a string node.
141
     */
142
    final protected function parseString(Crawler $node, bool $multiline = false): ?string
143
    {
144
        if (0 === $node->count()) {
145
            return null;
146
        }
147
148
        $content = $node->attr('content');
149
150
        if (null === $content) {
151
            if ($multiline) {
152
                $content = $node->html();
153
                $content = (string) preg_replace('/<p[^>]*?>/', '', $content);
154
                $content = str_replace('</p>', static::NEWLINE, $content);
155
                $content = (string) preg_replace('/<br\s?\/?>/i', static::NEWLINE, $content);
156
            } else {
157
                $content = $node->text();
158
            }
159
        }
160
161
        return trim(strip_tags($content));
162
    }
163
164
    /**
165
     * Parses a date note.
166
     */
167
    final protected function parseDate(Crawler $node): ?DateTime
168
    {
169
        $content = $this->parseString($node);
170
171
        if (null !== $content) {
172
            return new DateTime($content);
173
        }
174
175
        return null;
176
    }
177
}
178