Passed
Push — master ( 204398...443e32 )
by Chema
01:50 queued 12s
created

News   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 90
Duplicated Lines 0 %

Test Coverage

Coverage 94.59%

Importance

Changes 0
Metric Value
wmc 10
eloc 41
dl 0
loc 90
ccs 35
cts 37
cp 0.9459
rs 10
c 0
b 0
f 0

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A innerHtml() 0 11 3
A crawlHtml() 0 11 1
A extractInfo() 0 14 1
A normalizeIncomingDate() 0 22 4
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Chemaclass\StockTicker\Domain\Crawler\Site\Barrons\HtmlCrawler;
6
7
use Chemaclass\StockTicker\Domain\Crawler\Site\Barrons\Exception\InvalidDateFormat;
8
use Chemaclass\StockTicker\Domain\Crawler\Site\Barrons\HtmlCrawlerInterface;
9
use Chemaclass\StockTicker\Domain\Crawler\Site\Shared\NewsNormalizerInterface;
10
use DateTimeImmutable;
11
use DOMNode;
12
use Symfony\Component\DomCrawler\Crawler;
13
14
final class News implements HtmlCrawlerInterface
15
{
16
    private const SOURCE = 'Barrons';
17
18
    /**
19
     * TODO: Refactor this logic to use regex instead... Something like this:
20
     * (?<month>\w{3}) (?<day>\d{1,2}), (?<year>\d{4}) ?(?<time>)
21
     *
22
     * @var array<int, string> the key is the length of the incoming date,
23
     *                         the value is the mask-format that we can apply to it.
24
     */
25
    private const DIFF_INCOMING_FORMATS = [
26
        11 => 'M d, Y',     // Dec 9, 2020
27
        12 => 'M d, Y',     // Dec 13, 2020
28
        17 => 'M d, Y H:i', // Dec 9, 2020 8:00
29
        18 => 'M d, Y H:i', // Dec 13, 2020 8:00
30
    ];
31
32
    private NewsNormalizerInterface $newsNormalizer;
33
34 4
    public function __construct(NewsNormalizerInterface $newsNormalizer)
35
    {
36 4
        $this->newsNormalizer = $newsNormalizer;
37 4
    }
38
39 4
    public function crawlHtml(string $html): array
40
    {
41 4
        $nodes = (new Crawler($html))
42 4
            ->filter('#barrons-news-infinite ul li');
43
44 4
        $news = array_map(
45 4
            fn ($node) => $this->extractInfo($node),
46 4
            iterator_to_array($nodes)
47
        );
48
49 4
        return $this->newsNormalizer->limitByMaxToFetch($news);
50
    }
51
52 2
    private function extractInfo(DOMNode $node): array
53
    {
54 2
        preg_match(
55 2
            '/^<span class="date">(?<date>.+)<\/span><a href="(?<url>.+)">(?<title>.+)<\/a>/',
56 2
            $this->innerHtml($node),
57
            $matches
58
        );
59
60
        return [
61 2
            'source' => self::SOURCE,
62 2
            'datetime' => $this->normalizeIncomingDate($matches['date']),
63 2
            'timezone' => $this->newsNormalizer->getTimeZoneName(),
64 2
            'url' => $matches['url'],
65 2
            'title' => $this->newsNormalizer->normalizeText($matches['title']),
66
        ];
67
    }
68
69 2
    private function innerHtml(DOMNode $node): string
70
    {
71 2
        $innerHtml = '';
72
73 2
        foreach ($node->childNodes as $child) {
74 2
            if (null !== $child->ownerDocument) {
75 2
                $innerHtml .= $child->ownerDocument->saveXML($child);
76
            }
77
        }
78
79 2
        return htmlspecialchars_decode($innerHtml);
80
    }
81
82 2
    private function normalizeIncomingDate(string $incomingDate): string
83
    {
84 2
        $incomingDate = trim($incomingDate);
85
86 2
        if (mb_strlen($incomingDate) >= 25) {
87 2
            $incomingDate = mb_substr($incomingDate, 0, -8);
88
        }
89
90 2
        $len = mb_strlen($incomingDate);
91 2
        $incomingFormat = self::DIFF_INCOMING_FORMATS[$len] ?? null;
92
93 2
        if (null === $incomingFormat) {
94
            throw InvalidDateFormat::forIncomingDate($incomingDate);
95
        }
96
97 2
        $dt = DateTimeImmutable::createFromFormat($incomingFormat, $incomingDate);
98
99 2
        if (false === $dt) {
100
            throw InvalidDateFormat::couldNotCreateDateTime($incomingDate, $incomingFormat);
101
        }
102
103 2
        return $this->newsNormalizer->normalizeDateTime($dt);
104
    }
105
}
106