|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace Chemaclass\StockTicker\Domain\Crawler\Site\FinanceYahoo; |
|
6
|
|
|
|
|
7
|
|
|
use Chemaclass\StockTicker\Domain\Crawler\SiteCrawlerInterface; |
|
8
|
|
|
use Chemaclass\StockTicker\Domain\ReadModel\Site; |
|
9
|
|
|
use Chemaclass\StockTicker\Domain\ReadModel\Symbol; |
|
10
|
|
|
use Symfony\Contracts\HttpClient\HttpClientInterface; |
|
11
|
|
|
|
|
12
|
|
|
use function assert; |
|
13
|
|
|
use function get_class; |
|
14
|
|
|
use function is_int; |
|
15
|
|
|
|
|
16
|
|
|
/** |
|
17
|
|
|
* @see "data/RootAppMainJsonExample.json" to see the structure of the `root.App.main` json. |
|
18
|
|
|
* @see https://jsoneditoronline.org/ to visualize and find what you are interested in. |
|
19
|
|
|
*/ |
|
20
|
|
|
final class FinanceYahooSiteCrawler implements SiteCrawlerInterface |
|
21
|
|
|
{ |
|
22
|
|
|
private const REQUEST_METHOD = 'GET'; |
|
23
|
|
|
|
|
24
|
|
|
private const REQUEST_URL = 'https://finance.yahoo.com/quote/%s'; |
|
25
|
|
|
|
|
26
|
6 |
|
/** @var array<int|string,JsonExtractorInterface> */ |
|
27
|
|
|
private array $jsonExtractors; |
|
28
|
6 |
|
|
|
29
|
6 |
|
public function __construct(array $jsonExtractors) |
|
30
|
|
|
{ |
|
31
|
|
|
foreach ($jsonExtractors as $extractor) { |
|
32
|
6 |
|
assert($extractor instanceof JsonExtractorInterface); |
|
33
|
6 |
|
} |
|
34
|
|
|
|
|
35
|
6 |
|
$this->jsonExtractors = $jsonExtractors; |
|
36
|
|
|
} |
|
37
|
6 |
|
|
|
38
|
|
|
public function crawl(HttpClientInterface $httpClient, Symbol $symbol): Site |
|
39
|
|
|
{ |
|
40
|
6 |
|
$url = sprintf(self::REQUEST_URL, $symbol->toString()); |
|
41
|
6 |
|
|
|
42
|
|
|
$html = $httpClient |
|
43
|
6 |
|
->request(self::REQUEST_METHOD, $url) |
|
44
|
|
|
->getContent(); |
|
45
|
6 |
|
|
|
46
|
|
|
preg_match('/root\.App\.main\ =\ (?<json>.*);/m', $html, $matches); |
|
47
|
6 |
|
|
|
48
|
|
|
$json = (array) json_decode($matches['json'], true); |
|
49
|
|
|
$data = [ |
|
50
|
6 |
|
'symbol' => $symbol->toString(), |
|
51
|
6 |
|
]; |
|
52
|
6 |
|
|
|
53
|
|
|
foreach ($this->jsonExtractors as $name => $extractor) { |
|
54
|
|
|
$name = is_int($name) ? get_class($extractor) : $name; |
|
55
|
6 |
|
// FIXME: $json['context']['dispatcher']['stores'] has the content encoded... and I don't know how to decode it now |
|
56
|
|
|
$data[$name] = $extractor->extractFromJson($json); |
|
57
|
|
|
} |
|
58
|
|
|
|
|
59
|
|
|
return new Site($data); |
|
60
|
|
|
} |
|
61
|
|
|
} |
|
62
|
|
|
|