|
1
|
|
|
<?php |
|
2
|
|
|
/* |
|
3
|
|
|
* This file is part of the Slince/China package. |
|
4
|
|
|
* |
|
5
|
|
|
* (c) Slince <[email protected]> |
|
6
|
|
|
* |
|
7
|
|
|
* For the full copyright and license information, please view the LICENSE |
|
8
|
|
|
* file that was distributed with this source code. |
|
9
|
|
|
*/ |
|
10
|
|
|
|
|
11
|
|
|
namespace China\Command; |
|
12
|
|
|
|
|
13
|
|
|
use China\Region\Location\AddressInterface; |
|
14
|
|
|
use China\Region\Location\Area; |
|
15
|
|
|
use China\Region\Location\City; |
|
16
|
|
|
use China\Region\Location\Province; |
|
17
|
|
|
use Symfony\Component\Console\Input\InputInterface; |
|
18
|
|
|
use Symfony\Component\Console\Output\OutputInterface; |
|
19
|
|
|
use Symfony\Component\Console\Style\SymfonyStyle; |
|
20
|
|
|
use Symfony\Component\DomCrawler\Crawler; |
|
21
|
|
|
|
|
22
|
|
|
class GetRegionCommand extends CrawlCommand |
|
23
|
|
|
{ |
|
24
|
|
|
/** |
|
25
|
|
|
* 资源地址 |
|
26
|
|
|
* |
|
27
|
|
|
* @var string |
|
28
|
|
|
*/ |
|
29
|
|
|
const URL = 'http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201703/t20170310_1471429.html'; |
|
30
|
|
|
|
|
31
|
|
|
/** |
|
32
|
|
|
* {@inheritdoc} |
|
33
|
|
|
*/ |
|
34
|
|
|
public function configure() |
|
35
|
|
|
{ |
|
36
|
|
|
$this->setName('crawl:region'); |
|
37
|
|
|
$this->setDescription('从国家统计局采集地区信息'); |
|
38
|
|
|
} |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* {@inheritdoc} |
|
42
|
|
|
*/ |
|
43
|
|
|
public function execute(InputInterface $input, OutputInterface $output) |
|
44
|
|
|
{ |
|
45
|
|
|
$style = new SymfonyStyle($input, $output); |
|
46
|
|
|
|
|
47
|
|
|
$outputFile = static::RESOURCE_DIR.'/regions/regions.json'; |
|
48
|
|
|
|
|
49
|
|
|
$crawler = $this->getClient()->request('GET', static::URL); |
|
50
|
|
|
|
|
51
|
|
|
$provinces = $cities = $areas = []; |
|
52
|
|
|
$regions = $crawler->filter('p.MsoNormal')->each(function(Crawler $node) use (&$provinces, &$cities, &$areas){ |
|
53
|
|
|
$code = $node->filter('span[lang="EN-US"]')->text(); |
|
54
|
|
|
$name = $node->filter('span[style]')->last()->text(); |
|
55
|
|
|
|
|
56
|
|
|
return [ |
|
57
|
|
|
'code' => preg_replace('/[^\d]/', '', $code), |
|
58
|
|
|
'name' => $this->clearBlankCharacters($name), |
|
59
|
|
|
]; |
|
60
|
|
|
}); |
|
61
|
|
|
//归类数据 |
|
62
|
|
|
list($provinces, $cities, $areas) = $this->organizeRegions($regions); |
|
63
|
|
|
//构建树形结构 |
|
64
|
|
|
$root = new Province(0, null); |
|
65
|
|
|
$root->shortCode = 0; |
|
|
|
|
|
|
66
|
|
|
$this->buildRegionsTree(array_merge($provinces, $cities, $areas), $root); |
|
67
|
|
|
|
|
68
|
|
|
$this->filesystem->dumpFile(static::RESOURCE_DIR.'/regions/provinces.json', \GuzzleHttp\json_encode($this->extractAddressesWithoutChildren($provinces), JSON_UNESCAPED_UNICODE)); |
|
69
|
|
|
$this->filesystem->dumpFile(static::RESOURCE_DIR.'/regions/cities.json', \GuzzleHttp\json_encode($this->extractAddressesWithoutChildren($cities), JSON_UNESCAPED_UNICODE)); |
|
70
|
|
|
$this->filesystem->dumpFile(static::RESOURCE_DIR.'/regions/areas.json', \GuzzleHttp\json_encode($this->extractAddressesWithoutChildren($areas), JSON_UNESCAPED_UNICODE)); |
|
71
|
|
|
$this->filesystem->dumpFile($outputFile, \GuzzleHttp\json_encode($root->getChildren(), JSON_UNESCAPED_UNICODE)); |
|
72
|
|
|
|
|
73
|
|
|
$style->writeln(sprintf('<info>Crawl completed, please check the file at "%s"</info>', realpath($outputFile))); |
|
74
|
|
|
} |
|
75
|
|
|
|
|
76
|
|
|
/** |
|
77
|
|
|
* 提取省份数据,去除子地区数据. |
|
78
|
|
|
* |
|
79
|
|
|
* @param AddressInterface[] $addresses |
|
80
|
|
|
* |
|
81
|
|
|
* @return AddressInterface[] |
|
82
|
|
|
*/ |
|
83
|
|
|
protected function extractAddressesWithoutChildren(array $addresses) |
|
84
|
|
|
{ |
|
85
|
|
|
return array_map(function(AddressInterface $address){ |
|
86
|
|
|
$address = clone $address; |
|
87
|
|
|
$address->setChildren([]); |
|
88
|
|
|
|
|
89
|
|
|
return $address; |
|
90
|
|
|
}, $addresses); |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
/** |
|
94
|
|
|
* 分拣数据. |
|
95
|
|
|
* |
|
96
|
|
|
* @param array $regions |
|
97
|
|
|
* |
|
98
|
|
|
* @return array |
|
99
|
|
|
*/ |
|
100
|
|
|
protected function organizeRegions($regions) |
|
101
|
|
|
{ |
|
102
|
|
|
$provinces = $cities = $areas = []; |
|
103
|
|
|
foreach ($regions as $regionData) { |
|
104
|
|
|
if (substr($regionData['code'], 2) === '0000') { |
|
105
|
|
|
$province = new Province($regionData['code'], $regionData['name']); |
|
106
|
|
|
$province->parentCode = 0; |
|
|
|
|
|
|
107
|
|
|
$province->shortCode = substr($regionData['code'], 0, 2); |
|
|
|
|
|
|
108
|
|
|
$provinces[] = $province; |
|
109
|
|
|
} elseif (substr($regionData['code'], 4) === '00') { |
|
110
|
|
|
$city = new City($regionData['code'], $regionData['name']); |
|
111
|
|
|
$city->parentCode = substr($regionData['code'], 0, 2); |
|
|
|
|
|
|
112
|
|
|
$city->shortCode = substr($regionData['code'], 0, 4); |
|
|
|
|
|
|
113
|
|
|
$cities[] = $city; |
|
114
|
|
|
} else { |
|
115
|
|
|
$area = new Area($regionData['code'], $regionData['name']); |
|
116
|
|
|
$area->parentCode = substr($regionData['code'], 0, 4); |
|
|
|
|
|
|
117
|
|
|
$area->shortCode = $regionData['code']; |
|
|
|
|
|
|
118
|
|
|
$areas[] = $area; |
|
119
|
|
|
} |
|
120
|
|
|
} |
|
121
|
|
|
|
|
122
|
|
|
return [ |
|
123
|
|
|
$provinces, |
|
124
|
|
|
$cities, |
|
125
|
|
|
$areas, |
|
126
|
|
|
]; |
|
127
|
|
|
} |
|
128
|
|
|
|
|
129
|
|
|
protected function buildRegionsTree($addresses, AddressInterface $address) |
|
130
|
|
|
{ |
|
131
|
|
|
$children = []; |
|
132
|
|
|
foreach ($addresses as $index => $_address) { |
|
133
|
|
|
if ($_address->parentCode == $address->shortCode) { |
|
|
|
|
|
|
134
|
|
|
unset($addresses[$index]); |
|
135
|
|
|
$this->buildRegionsTree($addresses, $_address); |
|
136
|
|
|
$children[] = $_address; |
|
137
|
|
|
} |
|
138
|
|
|
} |
|
139
|
|
|
$address->setChildren($children); |
|
140
|
|
|
} |
|
141
|
|
|
} |
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.
If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.