GetNationalityCommand::mergeData()   A
last analyzed

Complexity

Conditions 3
Paths 2

Size

Total Lines 12
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 7
nc 2
nop 2
1
<?php
2
/*
3
 * This file is part of the Slince/China package.
4
 *
5
 * (c) Slince <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace China\Command;
12
13
use China\Nationality\Nationality;
14
use Symfony\Component\Console\Input\InputInterface;
15
use Symfony\Component\Console\Output\OutputInterface;
16
use Symfony\Component\Console\Style\SymfonyStyle;
17
use Symfony\Component\DomCrawler\Crawler;
18
19
class GetNationalityCommand extends CrawlCommand
20
{
21
    /**
22
     * @var string
23
     */
24
    const URL = 'https://baike.baidu.com/item/56个民族/383735';
25
26
    /**
27
     * {@inheritdoc}
28
     */
29
    public function configure()
30
    {
31
        $this->setName('crawl:nationality');
32
        $this->setDescription('从百度百科采集民族信息');
33
    }
34
35
    /**
36
     * {@inheritdoc}
37
     */
38
    public function execute(InputInterface $input, OutputInterface $output)
39
    {
40
        $style = new SymfonyStyle($input, $output);
41
42
        $outputFile = static::RESOURCE_DIR.'/nationalities.json';
43
44
        $crawler = $this->getClient()->request('GET', static::URL);
45
46
        $tables = $crawler->filter('table[log-set-param="table_view"]');
47
        $nationalities = $this->extractPinyinData($tables->eq(1));
48
        $populations = $this->extractPopulationData($tables->eq(2));
49
50
        $nationalities = $this->mergeData($nationalities,$populations);
51
        $this->filesystem->dumpFile($outputFile, \GuzzleHttp\json_encode($nationalities, JSON_UNESCAPED_UNICODE));
52
53
        $style->writeln(sprintf('<info>Crawl completed, please check the file at "%s"</info>', realpath($outputFile)));
54
    }
55
56
    protected function extractPinyinData(Crawler $crawler)
57
    {
58
        $nationalities = $crawler->filter('tr')->each(function(Crawler $itemNode){
59
            $data = [];
60
            $itemNode->filter('td')->each(function(Crawler $tdNode, $index) use (&$data){
61
                if ($index % 2 === 0) {
62
                    $data[$index] = [
63
                        'name' => trim($tdNode->text()),
64
                        'pinyin' => false,
65
                    ];
66
                } else {
67
                    $data[$index - 1]['pinyin'] = trim($tdNode->text());
68
                }
69
            });
70
71
            return $data;
72
        });
73
74
        return call_user_func_array('array_merge', $nationalities);
75
    }
76
77
    protected function extractPopulationData(Crawler $crawler)
78
    {
79
        $data = [];
80
        $crawler->filter('tr')->each(function(Crawler $itemNode) use (&$data){
81
            $tds = $itemNode->filter('td');
82
            if (count($tds) > 0) {
83
                $name = trim($tds->first()->text());
84
                $data[$name] = trim($tds->eq(1)->text());
85
            }
86
        });
87
88
        return $data;
89
    }
90
91
    protected function mergeData($nationalityInfos, $populations)
92
    {
93
        $nationalities = [];
94
        foreach ($nationalityInfos as $nationalityInfo) {
95
            $nationalities[] = new Nationality($nationalityInfo['name'],
96
                $nationalityInfo['pinyin'],
97
                isset($populations[$nationalityInfo['name']]) ? $populations[$nationalityInfo['name']] : 0
98
            );
99
        }
100
101
        return $nationalities;
102
    }
103
}