1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Chemaclass\StockTicker\Domain\Crawler\Site\FinanceYahoo\JsonExtractor\StreamStore; |
6
|
|
|
|
7
|
|
|
use Chemaclass\StockTicker\Domain\Crawler\Site\FinanceYahoo\JsonExtractorInterface; |
8
|
|
|
use Chemaclass\StockTicker\Domain\Crawler\Site\Shared\NewsNormalizerInterface; |
9
|
|
|
use DateTimeImmutable; |
10
|
|
|
|
11
|
|
|
final class News implements JsonExtractorInterface |
12
|
|
|
{ |
13
|
|
|
private const SOURCE = 'FinanceYahoo'; |
14
|
|
|
private const TYPE_ARTICLE = 'article'; |
15
|
|
|
|
16
|
|
|
private NewsNormalizerInterface $newsNormalizer; |
17
|
|
|
|
18
|
8 |
|
public function __construct(NewsNormalizerInterface $newsNormalizer) |
19
|
|
|
{ |
20
|
8 |
|
$this->newsNormalizer = $newsNormalizer; |
21
|
8 |
|
} |
22
|
|
|
|
23
|
8 |
|
public function extractFromJson(array $json): array |
24
|
|
|
{ |
25
|
8 |
|
$streams = $json['context']['dispatcher']['stores']['StreamStore']['streams']; |
26
|
8 |
|
$first = reset($streams); |
27
|
8 |
|
$streamItems = $first['data']['stream_items']; |
28
|
|
|
|
29
|
8 |
|
$articles = $this->filterOnlyArticles($streamItems); |
30
|
8 |
|
$extractedInfo = $this->extractInfo($articles); |
31
|
8 |
|
$sorted = $this->sortNewestFirst($extractedInfo); |
32
|
|
|
|
33
|
8 |
|
return $this->newsNormalizer->limitByMaxToFetch($sorted); |
34
|
|
|
} |
35
|
|
|
|
36
|
8 |
|
private function filterOnlyArticles(array $items): array |
37
|
|
|
{ |
38
|
8 |
|
return array_filter( |
39
|
8 |
|
$items, |
40
|
8 |
|
static fn (array $i): bool => $i['type'] === self::TYPE_ARTICLE, |
41
|
8 |
|
); |
42
|
|
|
} |
43
|
|
|
|
44
|
8 |
|
private function extractInfo(array $articles): array |
45
|
|
|
{ |
46
|
8 |
|
$normalizedArticles = array_map( |
47
|
8 |
|
fn (array $article): array => $this->normalizeArticle($article), |
48
|
|
|
$articles, |
49
|
|
|
); |
50
|
|
|
|
51
|
8 |
|
return array_values($normalizedArticles); |
52
|
|
|
} |
53
|
|
|
|
54
|
6 |
|
private function normalizeArticle(array $article): array |
55
|
|
|
{ |
56
|
|
|
return [ |
57
|
6 |
|
'source' => self::SOURCE, |
58
|
6 |
|
'datetime' => $this->normalizeDateTimeFromUnix($article['pubtime'] ?? 0), |
59
|
6 |
|
'timezone' => $this->newsNormalizer->getTimeZoneName(), |
60
|
6 |
|
'url' => $article['url'] ?? '', |
61
|
6 |
|
'title' => $this->newsNormalizer->normalizeText($article['title'] ?? ''), |
62
|
6 |
|
'summary' => $this->newsNormalizer->normalizeText($article['summary'] ?? ''), |
63
|
6 |
|
'publisher' => $article['publisher'] ?? '', |
64
|
6 |
|
'images' => $article['images'] ?? [], |
65
|
|
|
]; |
66
|
|
|
} |
67
|
|
|
|
68
|
6 |
|
private function normalizeDateTimeFromUnix(int $pubtime): string |
69
|
|
|
{ |
70
|
6 |
|
$unixTime = (int) mb_substr((string) $pubtime, 0, -3); |
71
|
6 |
|
$dateTime = new DateTimeImmutable("@{$unixTime}"); |
72
|
|
|
|
73
|
6 |
|
return $this->newsNormalizer->normalizeDateTime($dateTime); |
74
|
|
|
} |
75
|
|
|
|
76
|
8 |
|
private function sortNewestFirst(array $articles): array |
77
|
|
|
{ |
78
|
8 |
|
usort( |
79
|
8 |
|
$articles, |
80
|
8 |
|
static fn (array $a, array $b) => $b['datetime'] <=> $a['datetime'], |
81
|
8 |
|
); |
82
|
|
|
|
83
|
8 |
|
return $articles; |
84
|
|
|
} |
85
|
|
|
} |
86
|
|
|
|