Issues in AbstractCrawler.php - All Issues - Inspection of "[PATCH] Fixed wrong parameter type" - core23/lastfm-php-api - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( fc6845...7741ae )

by Christian

created 2019-04-29 17:25 UTC

src/Crawler/AbstractCrawler.php (1 issue)

Labels

Severity

Major 1

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

<?php

declare(strict_types=1);

/*
 * (c) Christian Gripp <[email protected]>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace Core23\LastFm\Crawler;

use Core23\LastFm\Connection\ConnectionInterface;
use Core23\LastFm\Exception\CrawlException;
use Core23\LastFm\Model\Event;
use Core23\LastFm\Model\Image;
use Core23\LastFm\Model\Venue;
use Core23\LastFm\Model\VenueAddress;
use DateTime;
use Exception;
use Symfony\Component\DomCrawler\Crawler;

abstract class AbstractCrawler
{
    public const URL_PREFIX = 'http://last.fm';

    public const NEWLINE = "\n";

    /**
     * @var ConnectionInterface
     */
    private $connection;

    /**
     * @param ConnectionInterface $connection
     */
    public function __construct(ConnectionInterface $connection)
    {
        $this->connection = $connection;
    }

    /**
     * Crawles a url.
     *
     * @param string $url
     * @param array  $params
     *
     * @return Crawler|null
     */
    final protected function crawl(string $url, array $params = []): ?Crawler
    {
        if ($content = $this->connection->getPageBody($url, $params)) {
            return new Crawler($content);
        }

        return null;
    }

    /**
     * @param Crawler  $node
     * @param DateTime $datetime
     *
     * @return Event
     */
    final protected function parseEvent(Crawler $node, DateTime $datetime = null): Event
    {
        $eventNode = $node->filter('.events-list-item-event--title a');

        $url = $this->parseUrl($eventNode);

        if (null === $url) {
            throw new CrawlException('Error parsing event id.');
        }

        $id = (int) preg_replace('/.*\/(\d+)+.*/', '$1', $url);

        if (0 === $id) {
            throw new CrawlException('Error parsing event id.');
        }

        if (null === $datetime) {
            try {
                $datetime = new DateTime($node->filter('time')->attr('datetime'));
            } catch (Exception $exception) {
                throw new CrawlException('Error reading event date', $exception->getCode(), $exception);
            }
        }

        $venue = $this->parseVenue($node->filter('.events-list-item-venue'));

        return new Event(
            $id,
            $this->parseString($eventNode) ?? '',
            $datetime,
            $url,
            $venue
        );
    }

    /**
     * @param Crawler $node
     *
     * @return Venue|null
     */
    final protected function parseVenue(Crawler $node): ?Venue
    {
        $title   = $this->parseString($node->filter('.events-list-item-venue--title'));

        if (null === $title) {
            return null;
        }

        $city    = $this->parseString($node->filter('.events-list-item-venue--city'));
        $country = $this->parseString($node->filter('.events-list-item-venue--country'));

        return new Venue($title, null, null, new VenueAddress(
            null,
            null,
            $city,
            $country
        ));
    }

    /**
     * Parses a url node.
     *
     * @param Crawler $node
     * @param string  $attr
     *
     * @return string|null
     */
    final protected function parseUrl(Crawler $node, string $attr = 'href'): ?string
    {
        if (0 === $node->count()) {
            return null;
        }

        if ($url = $node->attr($attr)) {
            return preg_replace('/^\//', static::URL_PREFIX.'/', $url);
        }

        return null;
    }

    /**
     * Parses an image node.
     *
     * @param Crawler $node
     *
     * @return Image|null
     */
    final protected function parseImage(Crawler $node): ?Image
    {
        $src = $this->parseUrl($node, 'src');

        if (!$src) {
''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
            return null;
        }

        return new Image($src);
    }

    /**
     * Parses a string node.
     *
     * @param Crawler $node
     * @param bool    $multiline
     *
     * @return string|null
     */
    final protected function parseString(Crawler $node, bool $multiline = false): ?string
    {
        if (0 === $node->count()) {
            return null;
        }

        $content = $node->attr('content');

        if (null === $content) {
            if ($multiline) {
                $content = $node->html();
                $content = (string) preg_replace('/<p[^>]*?>/', '', $content);
                $content = str_replace('</p>', static::NEWLINE, $content);
                $content = (string) preg_replace('/<br\s?\/?>/i', static::NEWLINE, $content);
            } else {
                $content = $node->text();
            }
        }

        return trim(strip_tags($content));
    }

    /**
     * Parses a date note.
     *
     * @param Crawler $node
     *
     * @return DateTime|null
     */
    final protected function parseDate(Crawler $node): ?DateTime
    {
        $content = $this->parseString($node);

        if (null !== $content) {
            return new DateTime($content);
        }

        return null;
    }
}


GitHub Access Token became invalid

Push — master ( fc6845...7741ae )

src/Crawler/AbstractCrawler.php (1 issue)

Labels

Severity

Introduced By

Upgrade to new PHP Analysis Engine

1			<?php
2
3			declare(strict_types=1);
4
5			/*
6			* (c) Christian Gripp <[email protected]>
7			*
8			* For the full copyright and license information, please view the LICENSE
9			* file that was distributed with this source code.
10			*/
11
12			namespace Core23\LastFm\Crawler;
13
14			use Core23\LastFm\Connection\ConnectionInterface;
15			use Core23\LastFm\Exception\CrawlException;
16			use Core23\LastFm\Model\Event;
17			use Core23\LastFm\Model\Image;
18			use Core23\LastFm\Model\Venue;
19			use Core23\LastFm\Model\VenueAddress;
20			use DateTime;
21			use Exception;
22			use Symfony\Component\DomCrawler\Crawler;
23
24			abstract class AbstractCrawler
25			{
26			public const URL_PREFIX = 'http://last.fm';
27
28			public const NEWLINE = "\n";
29
30			/**
31			* @var ConnectionInterface
32			*/
33			private $connection;
34
35			/**
36			* @param ConnectionInterface $connection
37			*/
38			public function __construct(ConnectionInterface $connection)
39			{
40			$this->connection = $connection;
41			}
42
43			/**
44			* Crawles a url.
45			*
46			* @param string $url
47			* @param array $params
48			*
49			* @return Crawler\|null
50			*/
51			final protected function crawl(string $url, array $params = []): ?Crawler
52			{
53			if ($content = $this->connection->getPageBody($url, $params)) {
54			return new Crawler($content);
55			}
56
57			return null;
58			}
59
60			/**
61			* @param Crawler $node
62			* @param DateTime $datetime
63			*
64			* @return Event
65			*/
66			final protected function parseEvent(Crawler $node, DateTime $datetime = null): Event
67			{
68			$eventNode = $node->filter('.events-list-item-event--title a');
69
70			$url = $this->parseUrl($eventNode);
71
72			if (null === $url) {
73			throw new CrawlException('Error parsing event id.');
74			}
75
76			$id = (int) preg_replace('/.\/(\d+)+./', '$1', $url);
77
78			if (0 === $id) {
79			throw new CrawlException('Error parsing event id.');
80			}
81
82			if (null === $datetime) {
83			try {
84			$datetime = new DateTime($node->filter('time')->attr('datetime'));
85			} catch (Exception $exception) {
86			throw new CrawlException('Error reading event date', $exception->getCode(), $exception);
87			}
88			}
89
90			$venue = $this->parseVenue($node->filter('.events-list-item-venue'));
91
92			return new Event(
93			$id,
94			$this->parseString($eventNode) ?? '',
95			$datetime,
96			$url,
97			$venue
98			);
99			}
100
101			/**
102			* @param Crawler $node
103			*
104			* @return Venue\|null
105			*/
106			final protected function parseVenue(Crawler $node): ?Venue
107			{
108			$title = $this->parseString($node->filter('.events-list-item-venue--title'));
109
110			if (null === $title) {
111			return null;
112			}
113
114			$city = $this->parseString($node->filter('.events-list-item-venue--city'));
115			$country = $this->parseString($node->filter('.events-list-item-venue--country'));
116
117			return new Venue($title, null, null, new VenueAddress(
118			null,
119			null,
120			$city,
121			$country
122			));
123			}
124
125			/**
126			* Parses a url node.
127			*
128			* @param Crawler $node
129			* @param string $attr
130			*
131			* @return string\|null
132			*/
133			final protected function parseUrl(Crawler $node, string $attr = 'href'): ?string
134			{
135			if (0 === $node->count()) {
136			return null;
137			}
138
139			if ($url = $node->attr($attr)) {
140			return preg_replace('/^\//', static::URL_PREFIX.'/', $url);
141			}
142
143			return null;
144			}
145
146			/**
147			* Parses an image node.
148			*
149			* @param Crawler $node
150			*
151			* @return Image\|null
152			*/
153			final protected function parseImage(Crawler $node): ?Image
154			{
155			$src = $this->parseUrl($node, 'src');
156
157			if (!$src) {
			0 ignored issues – show Bug Best Practice introduced 2018-03-31 15:29 UTC by Report Bug Copy Issue Report Show Similar Issues like this The expression `$src` of type `null\|string` is loosely compared to `false`; this is ambiguous if the string can be empty. You might want to explicitly use `=== null` instead. In PHP, under loose comparison (like `==`, or `!=`, or `switch` conditions), values of different types might be equal. For `string` values, the empty string `''` is a special case, in particular the following results might be unexpected: '' == false // true '' == null // true 'ab' == false // false 'ab' == null // false // It is often better to use strict comparison '' === false // false '' === null // false Loading history...
158			return null;
159			}
160
161			return new Image($src);
162			}
163
164			/**
165			* Parses a string node.
166			*
167			* @param Crawler $node
168			* @param bool $multiline
169			*
170			* @return string\|null
171			*/
172			final protected function parseString(Crawler $node, bool $multiline = false): ?string
173			{
174			if (0 === $node->count()) {
175			return null;
176			}
177
178			$content = $node->attr('content');
179
180			if (null === $content) {
181			if ($multiline) {
182			$content = $node->html();
183			$content = (string) preg_replace('/<p[^>]*?>/', '', $content);
184			$content = str_replace('</p>', static::NEWLINE, $content);
185			$content = (string) preg_replace('/<br\s?\/?>/i', static::NEWLINE, $content);
186			} else {
187			$content = $node->text();
188			}
189			}
190
191			return trim(strip_tags($content));
192			}
193
194			/**
195			* Parses a date note.
196			*
197			* @param Crawler $node
198			*
199			* @return DateTime\|null
200			*/
201			final protected function parseDate(Crawler $node): ?DateTime
202			{
203			$content = $this->parseString($node);
204
205			if (null !== $content) {
206			return new DateTime($content);
207			}
208
209			return null;
210			}
211			}
212

core23 / lastfm-php-api

GitHub Access Token became invalid

Push — master ( fc6845...7741ae )

src/Crawler/AbstractCrawler.php (1 issue)

Labels

Severity

Introduced By

Upgrade to new PHP Analysis Engine

Duplication Side-by-Side

Filter issues like