|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace Chemaclass\StockTicker\Domain\Crawler\Site\FinanceYahoo\JsonExtractor\StreamStore; |
|
6
|
|
|
|
|
7
|
|
|
use Chemaclass\StockTicker\Domain\Crawler\Site\FinanceYahoo\JsonExtractorInterface; |
|
8
|
|
|
use Chemaclass\StockTicker\Domain\Crawler\Site\Shared\NewsNormalizerInterface; |
|
9
|
|
|
use DateTimeImmutable; |
|
10
|
|
|
|
|
11
|
|
|
final class News implements JsonExtractorInterface |
|
12
|
|
|
{ |
|
13
|
|
|
private const SOURCE = 'FinanceYahoo'; |
|
14
|
|
|
private const TYPE_ARTICLE = 'article'; |
|
15
|
|
|
|
|
16
|
|
|
private NewsNormalizerInterface $newsNormalizer; |
|
17
|
|
|
|
|
18
|
8 |
|
public function __construct(NewsNormalizerInterface $newsNormalizer) |
|
19
|
|
|
{ |
|
20
|
8 |
|
$this->newsNormalizer = $newsNormalizer; |
|
21
|
8 |
|
} |
|
22
|
|
|
|
|
23
|
8 |
|
public function extractFromJson(array $json): array |
|
24
|
|
|
{ |
|
25
|
8 |
|
$streams = $json['context']['dispatcher']['stores']['StreamStore']['streams']; |
|
26
|
8 |
|
$first = reset($streams); |
|
27
|
8 |
|
$streamItems = $first['data']['stream_items']; |
|
28
|
|
|
|
|
29
|
8 |
|
$articles = $this->filterOnlyArticles($streamItems); |
|
30
|
8 |
|
$extractedInfo = $this->extractInfo($articles); |
|
31
|
8 |
|
$sorted = $this->sortNewestFirst($extractedInfo); |
|
32
|
|
|
|
|
33
|
8 |
|
return $this->newsNormalizer->limitByMaxToFetch($sorted); |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
8 |
|
private function filterOnlyArticles(array $items): array |
|
37
|
|
|
{ |
|
38
|
8 |
|
return array_filter( |
|
39
|
8 |
|
$items, |
|
40
|
8 |
|
static fn (array $i): bool => $i['type'] === self::TYPE_ARTICLE, |
|
41
|
8 |
|
); |
|
42
|
|
|
} |
|
43
|
|
|
|
|
44
|
8 |
|
private function extractInfo(array $articles): array |
|
45
|
|
|
{ |
|
46
|
8 |
|
$normalizedArticles = array_map( |
|
47
|
8 |
|
fn (array $article): array => $this->normalizeArticle($article), |
|
48
|
|
|
$articles, |
|
49
|
|
|
); |
|
50
|
|
|
|
|
51
|
8 |
|
return array_values($normalizedArticles); |
|
52
|
|
|
} |
|
53
|
|
|
|
|
54
|
6 |
|
private function normalizeArticle(array $article): array |
|
55
|
|
|
{ |
|
56
|
|
|
return [ |
|
57
|
6 |
|
'source' => self::SOURCE, |
|
58
|
6 |
|
'datetime' => $this->normalizeDateTimeFromUnix($article['pubtime'] ?? 0), |
|
59
|
6 |
|
'timezone' => $this->newsNormalizer->getTimeZoneName(), |
|
60
|
6 |
|
'url' => $article['url'] ?? '', |
|
61
|
6 |
|
'title' => $this->newsNormalizer->normalizeText($article['title'] ?? ''), |
|
62
|
6 |
|
'summary' => $this->newsNormalizer->normalizeText($article['summary'] ?? ''), |
|
63
|
6 |
|
'publisher' => $article['publisher'] ?? '', |
|
64
|
6 |
|
'images' => $article['images'] ?? [], |
|
65
|
|
|
]; |
|
66
|
|
|
} |
|
67
|
|
|
|
|
68
|
6 |
|
private function normalizeDateTimeFromUnix(int $pubtime): string |
|
69
|
|
|
{ |
|
70
|
6 |
|
$unixTime = (int) mb_substr((string) $pubtime, 0, -3); |
|
71
|
6 |
|
$dateTime = new DateTimeImmutable("@{$unixTime}"); |
|
72
|
|
|
|
|
73
|
6 |
|
return $this->newsNormalizer->normalizeDateTime($dateTime); |
|
74
|
|
|
} |
|
75
|
|
|
|
|
76
|
8 |
|
private function sortNewestFirst(array $articles): array |
|
77
|
|
|
{ |
|
78
|
8 |
|
usort( |
|
79
|
8 |
|
$articles, |
|
80
|
8 |
|
static fn (array $a, array $b) => $b['datetime'] <=> $a['datetime'], |
|
81
|
8 |
|
); |
|
82
|
|
|
|
|
83
|
8 |
|
return $articles; |
|
84
|
|
|
} |
|
85
|
|
|
} |
|
86
|
|
|
|