AbstractCrawler::parseVenue() - Code Metrics - Inspection of "[PATCH] Fixed user event crawler selectors" - core23/lastfm-php-api - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( df4839...0fa3d6 )

by Christian

created 2019-04-19 10:00 UTC

AbstractCrawler::parseVenue() A

↳ Parent: AbstractCrawler

Complexity

Conditions	2
Paths	2

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
dl	0
loc	18
rs	9.6666
c	0
b	0
f	0
cc	2
nc	2
nop	1

<?php

declare(strict_types=1);

/*
 * (c) Christian Gripp <[email protected]>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace Core23\LastFm\Crawler;

use Core23\LastFm\Connection\ConnectionInterface;
use Core23\LastFm\Exception\CrawlException;
use Core23\LastFm\Model\Event;
use Core23\LastFm\Model\Image;
use Core23\LastFm\Model\Venue;
use Core23\LastFm\Model\VenueAddress;
use DateTime;
use Exception;
use Symfony\Component\DomCrawler\Crawler;

abstract class AbstractCrawler
{
    public const URL_PREFIX = 'http://last.fm';

    public const NEWLINE = "\n";

    /**
     * @var ConnectionInterface
     */
    private $connection;

    /**
     * @param ConnectionInterface $connection
     */
    public function __construct(ConnectionInterface $connection)
    {
        $this->connection = $connection;
    }

    /**
     * Crawles a url.
     *
     * @param string $url
     *
     * @return Crawler|null
     */
    final protected function crawl(string $url): ?Crawler
    {
        if ($content = $this->connection->getPageBody($url)) {
            return new Crawler($content);
        }

        return null;
    }

    /**
     * @param Crawler  $node
     * @param DateTime $datetime
     *
     * @return Event
     */
    final protected function parseEvent(Crawler $node, DateTime $datetime = null): Event
    {
        $eventNode = $node->filter('.events-list-item-event--title a');

        $url = $this->parseUrl($eventNode);

        if (null === $url) {
            throw new CrawlException('Error parsing event id.');
        }

        $id = (int) preg_replace('/.*\/(\d+)+.*/', '$1', $url);

        if (0 === $id) {
            throw new CrawlException('Error parsing event id.');
        }

        if (null === $datetime) {
            try {
                $datetime = new DateTime($node->filter('time')->attr('datetime'));
            } catch (Exception $exception) {
                throw new CrawlException('Error reading event date', $exception->getCode(), $exception);
            }
        }

        $venue = $this->parseVenue($node->filter('.events-list-item-venue'));

        return new Event(
            $id,
            $this->parseString($eventNode) ?? '',
            $datetime,
            $url,
            $venue
        );
    }

    /**
     * @param Crawler $node
     *
     * @return Venue|null
     */
    final protected function parseVenue(Crawler $node): ?Venue
    {
        $title   = $this->parseString($node->filter('.events-list-item-venue--title'));

        if (null === $title) {
            return null;
        }

        $city    = $this->parseString($node->filter('.events-list-item-venue--city'));
        $country = $this->parseString($node->filter('.events-list-item-venue--country'));

        return new Venue($title, null, null, new VenueAddress(
            null,
            null,
            $city,
            $country
        ));
    }

    /**
     * Parses a url node.
     *
     * @param Crawler $node
     * @param string  $attr
     *
     * @return string|null
     */
    final protected function parseUrl(Crawler $node, string $attr = 'href'): ?string
    {
        if (0 === $node->count()) {
            return null;
        }

        if ($url = $node->attr($attr)) {
            return preg_replace('/^\//', static::URL_PREFIX.'/', $url);
        }

        return null;
    }

    /**
     * Parses an image node.
     *
     * @param Crawler $node
     *
     * @return Image|null
     */
    final protected function parseImage(Crawler $node): ?Image
    {
        $src = $this->parseUrl($node, 'src');

        if (!$src) {
''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
            return null;
        }

        return new Image($src);
    }

    /**
     * Parses a string node.
     *
     * @param Crawler $node
     * @param bool    $multiline
     *
     * @return string|null
     */
    final protected function parseString(Crawler $node, bool $multiline = false): ?string
    {
        if (0 === $node->count()) {
            return null;
        }

        $content = $node->attr('content');

        if (null === $content) {
            if ($multiline) {
                $content = $node->html();
                $content = (string) preg_replace('/<p[^>]*?>/', '', $content);
                $content = str_replace('</p>', static::NEWLINE, $content);
                $content = (string) preg_replace('/<br\s?\/?>/i', static::NEWLINE, $content);
            } else {
                $content = $node->text();
            }
        }

        return trim(strip_tags($content));
    }

    /**
     * Parses a date note.
     *
     * @param Crawler $node
     *
     * @return DateTime|null
     */
    final protected function parseDate(Crawler $node): ?DateTime
    {
        $content = $this->parseString($node);

        if (null !== $content) {
            return new DateTime($content);
        }

        return null;
    }
}


1			<?php
2
3			declare(strict_types=1);
4
5			/*
6			* (c) Christian Gripp <[email protected]>
7			*
8			* For the full copyright and license information, please view the LICENSE
9			* file that was distributed with this source code.
10			*/
11
12			namespace Core23\LastFm\Crawler;
13
14			use Core23\LastFm\Connection\ConnectionInterface;
15			use Core23\LastFm\Exception\CrawlException;
16			use Core23\LastFm\Model\Event;
17			use Core23\LastFm\Model\Image;
18			use Core23\LastFm\Model\Venue;
19			use Core23\LastFm\Model\VenueAddress;
20			use DateTime;
21			use Exception;
22			use Symfony\Component\DomCrawler\Crawler;
23
24			abstract class AbstractCrawler
25			{
26			public const URL_PREFIX = 'http://last.fm';
27
28			public const NEWLINE = "\n";
29
30			/**
31			* @var ConnectionInterface
32			*/
33			private $connection;
34
35			/**
36			* @param ConnectionInterface $connection
37			*/
38			public function __construct(ConnectionInterface $connection)
39			{
40			$this->connection = $connection;
41			}
42
43			/**
44			* Crawles a url.
45			*
46			* @param string $url
47			*
48			* @return Crawler\|null
49			*/
50			final protected function crawl(string $url): ?Crawler
51			{
52			if ($content = $this->connection->getPageBody($url)) {
53			return new Crawler($content);
54			}
55
56			return null;
57			}
58
59			/**
60			* @param Crawler $node
61			* @param DateTime $datetime
62			*
63			* @return Event
64			*/
65			final protected function parseEvent(Crawler $node, DateTime $datetime = null): Event
66			{
67			$eventNode = $node->filter('.events-list-item-event--title a');
68
69			$url = $this->parseUrl($eventNode);
70
71			if (null === $url) {
72			throw new CrawlException('Error parsing event id.');
73			}
74
75			$id = (int) preg_replace('/.\/(\d+)+./', '$1', $url);
76
77			if (0 === $id) {
78			throw new CrawlException('Error parsing event id.');
79			}
80
81			if (null === $datetime) {
82			try {
83			$datetime = new DateTime($node->filter('time')->attr('datetime'));
84			} catch (Exception $exception) {
85			throw new CrawlException('Error reading event date', $exception->getCode(), $exception);
86			}
87			}
88
89			$venue = $this->parseVenue($node->filter('.events-list-item-venue'));
90
91			return new Event(
92			$id,
93			$this->parseString($eventNode) ?? '',
94			$datetime,
95			$url,
96			$venue
97			);
98			}
99
100			/**
101			* @param Crawler $node
102			*
103			* @return Venue\|null
104			*/
105			final protected function parseVenue(Crawler $node): ?Venue
106			{
107			$title = $this->parseString($node->filter('.events-list-item-venue--title'));
108
109			if (null === $title) {
110			return null;
111			}
112
113			$city = $this->parseString($node->filter('.events-list-item-venue--city'));
114			$country = $this->parseString($node->filter('.events-list-item-venue--country'));
115
116			return new Venue($title, null, null, new VenueAddress(
117			null,
118			null,
119			$city,
120			$country
121			));
122			}
123
124			/**
125			* Parses a url node.
126			*
127			* @param Crawler $node
128			* @param string $attr
129			*
130			* @return string\|null
131			*/
132			final protected function parseUrl(Crawler $node, string $attr = 'href'): ?string
133			{
134			if (0 === $node->count()) {
135			return null;
136			}
137
138			if ($url = $node->attr($attr)) {
139			return preg_replace('/^\//', static::URL_PREFIX.'/', $url);
140			}
141
142			return null;
143			}
144
145			/**
146			* Parses an image node.
147			*
148			* @param Crawler $node
149			*
150			* @return Image\|null
151			*/
152			final protected function parseImage(Crawler $node): ?Image
153			{
154			$src = $this->parseUrl($node, 'src');
155
156			if (!$src) {
			0 ignored issues – show Bug Best Practice introduced 2018-03-31 15:29 UTC by Report Bug Copy Issue Report The expression `$src` of type `null\|string` is loosely compared to `false`; this is ambiguous if the string can be empty. You might want to explicitly use `=== null` instead. In PHP, under loose comparison (like `==`, or `!=`, or `switch` conditions), values of different types might be equal. For `string` values, the empty string `''` is a special case, in particular the following results might be unexpected: '' == false // true '' == null // true 'ab' == false // false 'ab' == null // false // It is often better to use strict comparison '' === false // false '' === null // false Loading history...
157			return null;
158			}
159
160			return new Image($src);
161			}
162
163			/**
164			* Parses a string node.
165			*
166			* @param Crawler $node
167			* @param bool $multiline
168			*
169			* @return string\|null
170			*/
171			final protected function parseString(Crawler $node, bool $multiline = false): ?string
172			{
173			if (0 === $node->count()) {
174			return null;
175			}
176
177			$content = $node->attr('content');
178
179			if (null === $content) {
180			if ($multiline) {
181			$content = $node->html();
182			$content = (string) preg_replace('/<p[^>]*?>/', '', $content);
183			$content = str_replace('</p>', static::NEWLINE, $content);
184			$content = (string) preg_replace('/<br\s?\/?>/i', static::NEWLINE, $content);
185			} else {
186			$content = $node->text();
187			}
188			}
189
190			return trim(strip_tags($content));
191			}
192
193			/**
194			* Parses a date note.
195			*
196			* @param Crawler $node
197			*
198			* @return DateTime\|null
199			*/
200			final protected function parseDate(Crawler $node): ?DateTime
201			{
202			$content = $this->parseString($node);
203
204			if (null !== $content) {
205			return new DateTime($content);
206			}
207
208			return null;
209			}
210			}
211

core23 / lastfm-php-api

GitHub Access Token became invalid

Push — master ( df4839...0fa3d6 )

AbstractCrawler::parseVenue() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like