GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( df4839...0fa3d6 )
by Christian
02:01
created

AbstractCrawler::parseVenue()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 18
rs 9.6666
c 0
b 0
f 0
cc 2
nc 2
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * (c) Christian Gripp <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Core23\LastFm\Crawler;
13
14
use Core23\LastFm\Connection\ConnectionInterface;
15
use Core23\LastFm\Exception\CrawlException;
16
use Core23\LastFm\Model\Event;
17
use Core23\LastFm\Model\Image;
18
use Core23\LastFm\Model\Venue;
19
use Core23\LastFm\Model\VenueAddress;
20
use DateTime;
21
use Exception;
22
use Symfony\Component\DomCrawler\Crawler;
23
24
abstract class AbstractCrawler
25
{
26
    public const URL_PREFIX = 'http://last.fm';
27
28
    public const NEWLINE = "\n";
29
30
    /**
31
     * @var ConnectionInterface
32
     */
33
    private $connection;
34
35
    /**
36
     * @param ConnectionInterface $connection
37
     */
38
    public function __construct(ConnectionInterface $connection)
39
    {
40
        $this->connection = $connection;
41
    }
42
43
    /**
44
     * Crawles a url.
45
     *
46
     * @param string $url
47
     *
48
     * @return Crawler|null
49
     */
50
    final protected function crawl(string $url): ?Crawler
51
    {
52
        if ($content = $this->connection->getPageBody($url)) {
53
            return new Crawler($content);
54
        }
55
56
        return null;
57
    }
58
59
    /**
60
     * @param Crawler  $node
61
     * @param DateTime $datetime
62
     *
63
     * @return Event
64
     */
65
    final protected function parseEvent(Crawler $node, DateTime $datetime = null): Event
66
    {
67
        $eventNode = $node->filter('.events-list-item-event--title a');
68
69
        $url = $this->parseUrl($eventNode);
70
71
        if (null === $url) {
72
            throw new CrawlException('Error parsing event id.');
73
        }
74
75
        $id = (int) preg_replace('/.*\/(\d+)+.*/', '$1', $url);
76
77
        if (0 === $id) {
78
            throw new CrawlException('Error parsing event id.');
79
        }
80
81
        if (null === $datetime) {
82
            try {
83
                $datetime = new DateTime($node->filter('time')->attr('datetime'));
84
            } catch (Exception $exception) {
85
                throw new CrawlException('Error reading event date', $exception->getCode(), $exception);
86
            }
87
        }
88
89
        $venue = $this->parseVenue($node->filter('.events-list-item-venue'));
90
91
        return new Event(
92
            $id,
93
            $this->parseString($eventNode) ?? '',
94
            $datetime,
95
            $url,
96
            $venue
97
        );
98
    }
99
100
    /**
101
     * @param Crawler $node
102
     *
103
     * @return Venue|null
104
     */
105
    final protected function parseVenue(Crawler $node): ?Venue
106
    {
107
        $title   = $this->parseString($node->filter('.events-list-item-venue--title'));
108
109
        if (null === $title) {
110
            return null;
111
        }
112
113
        $city    = $this->parseString($node->filter('.events-list-item-venue--city'));
114
        $country = $this->parseString($node->filter('.events-list-item-venue--country'));
115
116
        return new Venue($title, null, null, new VenueAddress(
117
            null,
118
            null,
119
            $city,
120
            $country
121
        ));
122
    }
123
124
    /**
125
     * Parses a url node.
126
     *
127
     * @param Crawler $node
128
     * @param string  $attr
129
     *
130
     * @return string|null
131
     */
132
    final protected function parseUrl(Crawler $node, string $attr = 'href'): ?string
133
    {
134
        if (0 === $node->count()) {
135
            return null;
136
        }
137
138
        if ($url = $node->attr($attr)) {
139
            return preg_replace('/^\//', static::URL_PREFIX.'/', $url);
140
        }
141
142
        return null;
143
    }
144
145
    /**
146
     * Parses an image node.
147
     *
148
     * @param Crawler $node
149
     *
150
     * @return Image|null
151
     */
152
    final protected function parseImage(Crawler $node): ?Image
153
    {
154
        $src = $this->parseUrl($node, 'src');
155
156
        if (!$src) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $src of type null|string is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
157
            return null;
158
        }
159
160
        return new Image($src);
161
    }
162
163
    /**
164
     * Parses a string node.
165
     *
166
     * @param Crawler $node
167
     * @param bool    $multiline
168
     *
169
     * @return string|null
170
     */
171
    final protected function parseString(Crawler $node, bool $multiline = false): ?string
172
    {
173
        if (0 === $node->count()) {
174
            return null;
175
        }
176
177
        $content = $node->attr('content');
178
179
        if (null === $content) {
180
            if ($multiline) {
181
                $content = $node->html();
182
                $content = (string) preg_replace('/<p[^>]*?>/', '', $content);
183
                $content = str_replace('</p>', static::NEWLINE, $content);
184
                $content = (string) preg_replace('/<br\s?\/?>/i', static::NEWLINE, $content);
185
            } else {
186
                $content = $node->text();
187
            }
188
        }
189
190
        return trim(strip_tags($content));
191
    }
192
193
    /**
194
     * Parses a date note.
195
     *
196
     * @param Crawler $node
197
     *
198
     * @return DateTime|null
199
     */
200
    final protected function parseDate(Crawler $node): ?DateTime
201
    {
202
        $content = $this->parseString($node);
203
204
        if (null !== $content) {
205
            return new DateTime($content);
206
        }
207
208
        return null;
209
    }
210
}
211