LaravelNewsDataProvider::parseContent()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 14
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 8
nc 2
nop 1
dl 0
loc 14
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of laravel.su package.
4
 * For the full copyright and license information, please view the LICENSE
5
 * file that was distributed with this source code.
6
 */
7
declare(strict_types=1);
8
9
namespace App\Services\DataProviders;
10
11
use Carbon\Carbon;
12
use GuzzleHttp\Client;
13
use Illuminate\Support\Collection;
14
use Symfony\Component\DomCrawler\Crawler;
15
16
/**
17
 * Class LaravelNewsDataProvider.
18
 */
19
class LaravelNewsDataProvider implements DataProviderInterface
20
{
21
    private const CONTENT_NAMESPACE = 'http://purl.org/rss/1.0/modules/content/';
22
    private const FEED_URL = 'https://feed.laravel-news.com';
23
24
    /**
25
     * @var Client
26
     */
27
    private $client;
28
29
    /**
30
     * LaravelNewsDataProvider constructor.
31
     * @param Client $client
32
     */
33
    public function __construct(Client $client)
34
    {
35
        $this->client = $client;
36
    }
37
38
    /**
39
     * @param  \DateTime                    $latest
40
     * @return Collection|ExternalArticle[]
41
     * @throws \RuntimeException
42
     */
43
    public function getLatest(\DateTime $latest): Collection
44
    {
45
        $result = new Collection();
46
47
        foreach ($this->getArticles() as $article) {
48
            if (Carbon::instance($article->getCreatedAt())->addHour(1) < $latest) {
49
                break;
50
            }
51
52
            $result->push($article);
53
        }
54
55
        return $result;
56
    }
57
58
    /**
59
     * @return \Generator|ExternalArticle[]
60
     * @throws \RuntimeException
61
     */
62
    private function getArticles(): \Generator
63
    {
64
        $response = $this->client->get(self::FEED_URL);
65
66
        $body = $response->getBody();
67
68
        yield from $this->parseBody((string) $body);
69
    }
70
71
    /**
72
     * @param  string            $body
73
     * @return \Generator
74
     * @throws \RuntimeException
75
     */
76
    private function parseBody(string $body): \Generator
77
    {
78
        $parser = new Crawler($body);
79
80
        /** @var \DOMElement $node */
81
        foreach ($parser->filter('rss > channel > item') as $node) {
82
            $content = $node->getElementsByTagNameNS(self::CONTENT_NAMESPACE, '*')
83
                ->item(0)
84
                ->textContent;
85
86
            ['images' => $images, 'body' => $content] = $this->parseContent(
0 ignored issues
show
Bug introduced by
The variable $images does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
87
                $content
88
            );
89
90
            ['images' => $imagesPreview, 'body' => $preview] = $this->parseContent(
0 ignored issues
show
Bug introduced by
The variable $imagesPreview does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $preview does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
91
                (string) $this->getContentOf($node, 'description')
92
            );
93
94
            $title = (string) $this->getContentOf($node, 'title');
95
            $link = (string) $this->getContentOf($node, 'link');
96
            $published = (string) $this->getContentOf($node, 'pubDate');
97
98
            $article = new ExternalArticle($title, trim($content), $link);
99
100
            $article->setPreview($preview);
101
            $article->setCreatedAt(Carbon::parse($published));
102
103
            foreach ($images as $image) {
104
                $article->addImageUrl($image);
105
            }
106
107
            yield $article;
108
        }
109
    }
110
111
    /**
112
     * @param  string $body
113
     * @return array
114
     */
115
    private function parseContent(string $body): array
116
    {
117
        $images = [];
118
119
        $pattern = '/<img.+?src\s*=\s*"(.*?)".+?>/isu';
120
        preg_match_all($pattern, $body, $matches);
121
122
        for ($i = 0, $len = count($matches[0]); $i < $len; $i++) {
123
            $images[] = $matches[1][$i] ?? null;
124
            $body = str_replace($matches[0][$i] ?? '', '', $body);
125
        }
126
127
        return ['images' => $images, 'body' => $body];
128
    }
129
130
    /**
131
     * @param  \DOMElement $root
132
     * @param  string      $tagName
133
     * @return null|string
134
     */
135
    private function getContentOf(\DOMElement $root, string $tagName): ?string
136
    {
137
        $node = $root->getElementsByTagName($tagName);
138
139
        if ($node->length >= 1) {
140
            return $node->item(0)->textContent;
141
        }
142
    }
143
}
144