1
|
|
|
<?php
|
2
|
|
|
namespace BOTK\Command;
|
3
|
|
|
|
4
|
|
|
use Symfony\Component\Console\Command\Command;
|
5
|
|
|
use Symfony\Component\Console\Input\InputInterface;
|
6
|
|
|
use Symfony\Component\Console\Input\InputOption;
|
7
|
|
|
use Symfony\Component\Console\Output\OutputInterface;
|
8
|
|
|
use Symfony\Component\Console\Question\Question;
|
9
|
|
|
use SKAgarwal\GoogleApi\PlacesApi;
|
10
|
|
|
use BOTK\FactsFactory;
|
11
|
|
|
|
12
|
|
|
class GoogleMapQueryCommand extends Command
|
13
|
|
|
{
|
14
|
|
|
|
15
|
|
|
protected function configure()
|
16
|
|
|
{
|
17
|
|
|
$this
|
18
|
|
|
->setName('postman:reasoning')
|
19
|
|
|
->setDescription('Discover information about local business from its postal address.')
|
20
|
|
|
->setHelp(
|
21
|
|
|
'This command mimics a postman reasoning in learning data from a string containing '
|
22
|
|
|
.'a place name and/or a postal address (also incomplete) '."\n"
|
23
|
|
|
.'It tries to googling the internet returning a ttl file according '
|
24
|
|
|
.'botk Language profile. '."\n"
|
25
|
|
|
.'As input it requires a csv like streams with two fields: a search string and '
|
26
|
|
|
.'an uri to be linked (optional)'
|
27
|
|
|
)
|
28
|
|
|
->addOption('namespace','U', InputOption::VALUE_REQUIRED,
|
29
|
|
|
'the namespace for created Local Business URI',
|
30
|
|
|
'http://linkeddata.center/resource/'
|
31
|
|
|
)
|
32
|
|
|
->addOption('delay','d', InputOption::VALUE_REQUIRED,
|
33
|
|
|
'delay each call of a fixed amount of seconds',
|
34
|
|
|
0
|
35
|
|
|
)
|
36
|
|
|
->addOption('skip','s', InputOption::VALUE_REQUIRED,
|
37
|
|
|
'number of INPUT lines to skip',
|
38
|
|
|
1
|
39
|
|
|
)
|
40
|
|
|
->addOption('resilience','r', InputOption::VALUE_REQUIRED,
|
41
|
|
|
'max number of errors tolerated before aborting',
|
42
|
|
|
10
|
43
|
|
|
)
|
44
|
|
|
->addOption('fields','f', InputOption::VALUE_REQUIRED,
|
45
|
|
|
'detalis level required (none|contact)',
|
46
|
|
|
'contact'
|
47
|
|
|
)
|
48
|
|
|
->addOption('type','t', InputOption::VALUE_REQUIRED | InputOption::VALUE_IS_ARRAY,
|
49
|
|
|
'additional RDF type as uri',
|
50
|
|
|
array('http://schema.org/Place')
|
51
|
|
|
)
|
52
|
|
|
->addOption('assert','a', InputOption::VALUE_REQUIRED,
|
53
|
|
|
'asserted link predicate (sameAs|similarTo)',
|
54
|
|
|
'similarTo'
|
55
|
|
|
)
|
56
|
|
|
->addOption('limit','l', InputOption::VALUE_REQUIRED,
|
57
|
|
|
'max number of calls to google APIs',
|
58
|
|
|
4000
|
59
|
|
|
)
|
60
|
|
|
->addOption('region', 'R', InputOption::VALUE_REQUIRED,
|
61
|
|
|
'the two character country id (e.g. IT) for postman brain imprinting.',
|
62
|
|
|
'IT'
|
63
|
|
|
)
|
64
|
|
|
->addOption('provenance', 'P', InputOption::VALUE_REQUIRED,
|
65
|
|
|
'add provenance info level',
|
66
|
|
|
0
|
67
|
|
|
)
|
68
|
|
|
->addOption('key','k', InputOption::VALUE_REQUIRED,
|
69
|
|
|
'a valid google place api key (see https://developers.google.com/places/web-service/get-api-key)'
|
70
|
|
|
);
|
71
|
|
|
|
72
|
|
|
}
|
73
|
|
|
|
74
|
|
|
|
75
|
|
|
protected function execute(InputInterface $input, OutputInterface $output)
|
76
|
|
|
{
|
77
|
|
|
//cache input parameters
|
78
|
|
|
$uriNameSpace = $input->getOption('namespace');
|
79
|
|
|
$limit = $input->getOption('limit');
|
80
|
|
|
$sleepTime = $input->getOption('delay');
|
81
|
|
|
$detailLevel = $input->getOption('fields');
|
82
|
|
|
$resilience = $input->getOption('resilience');
|
83
|
|
|
$types = $input->getOption('type');
|
84
|
|
|
$similarityPredicate = $input->getOption('assert');
|
85
|
|
|
$provenance = $input->getOption('provenance');
|
86
|
|
|
$region = $input->getOption('region');
|
87
|
|
|
$key = $input->getOption('key');
|
88
|
|
|
|
89
|
|
|
if( empty($key)){
|
90
|
|
|
$helper = $this->getHelper('question');
|
91
|
|
|
$question = new Question('Please enter your google place api key: ');
|
92
|
|
|
$question->setValidator(function ($value) {
|
93
|
|
|
if (trim($value) == '') {
|
94
|
|
|
throw new \Exception('The key cannot be empty');
|
95
|
|
|
}
|
96
|
|
|
|
97
|
|
|
return $value;
|
98
|
|
|
});
|
99
|
|
|
$question->setHidden(true);
|
100
|
|
|
$question->setMaxAttempts(20);
|
101
|
|
|
|
102
|
|
|
$key = $helper->ask($input, $output, $question);
|
103
|
|
|
}
|
104
|
|
|
|
105
|
|
|
$googlePlaces = new PlacesApi($key);
|
106
|
|
|
$factsFactory = new FactsFactory( array(
|
107
|
|
|
'model' => 'LocalBusiness',
|
108
|
|
|
'modelOptions' => array(
|
109
|
|
|
// override the default lowercase filter for id because placeId is case sensitive
|
110
|
|
|
'id' => array('filter'=> FILTER_DEFAULT)
|
111
|
|
|
)
|
112
|
|
|
));
|
113
|
|
|
|
114
|
|
|
// print turtle prefixes
|
115
|
|
|
echo $factsFactory->generateLinkedDataHeader();
|
116
|
|
|
|
117
|
|
|
$lineCount=$callErrorCount = $consecutiveErrorsCount = $callCount = 0;
|
118
|
|
|
|
119
|
|
|
// skip input headers
|
120
|
|
|
for ($i = 0; $i < $input->getOption('skip'); $i++) {
|
121
|
|
|
$lineCount++;
|
122
|
|
|
$output->writeln("<info># Ignored header $lineCount: ". trim(fgets(STDIN)) . '</info>');
|
123
|
|
|
}
|
124
|
|
|
|
125
|
|
|
// main input loop
|
126
|
|
|
while( ($rawData= fgetcsv(STDIN)) && ($callCount <$limit) ){
|
127
|
|
|
$lineCount++;
|
128
|
|
|
|
129
|
|
|
$query = isset($rawData[0])?$rawData[0]:null;
|
130
|
|
|
$uri = isset($rawData[1])?\BOTK\Filters::FILTER_VALIDATE_URI($rawData[1]):null;
|
131
|
|
|
if(!$query) {
|
132
|
|
|
$output->writeln("<error># Ignored invalid row at line $lineCount.</error>");
|
133
|
|
|
continue;
|
134
|
|
|
}
|
135
|
|
|
|
136
|
|
|
|
137
|
|
|
//--------------------------------------------------------------------------------
|
138
|
|
|
// call google place textSearch api, tolerating some errors.
|
139
|
|
|
//--------------------------------------------------------------------------------
|
140
|
|
|
try {
|
141
|
|
|
$searchResultsCollection=$googlePlaces->textSearch($query, array('region'=> $region));
|
142
|
|
|
$consecutiveErrorsCount=0;
|
143
|
|
|
$callCount++;
|
144
|
|
|
} catch (\Exception $e) {
|
145
|
|
|
$consecutiveErrorsCount++;$callErrorCount++;
|
146
|
|
|
if( $consecutiveErrorsCount > $resilience ){
|
147
|
|
|
throw $e;
|
148
|
|
|
}
|
149
|
|
|
$messageString = trim(preg_replace('/\s+/', ' ', $e->getMessage()));
|
150
|
|
|
$output->writeln("<error># Ignored Search Api ERROR ($consecutiveErrorsCount): $messageString</error>");
|
151
|
|
|
continue;
|
152
|
|
|
}
|
153
|
|
|
|
154
|
|
|
// skip empty results
|
155
|
|
|
if ($googlePlaces->getStatus()==='ZERO_RESULTS'){
|
156
|
|
|
$output->writeln("<info># no results for '$query'.</info>");
|
157
|
|
|
continue;
|
158
|
|
|
}
|
159
|
|
|
|
160
|
|
|
$output->writeln("<info># discovered data for '$query'.</info>");
|
161
|
|
|
// factualize textSearch results
|
162
|
|
|
$result =$searchResultsCollection['results']->first();
|
163
|
|
|
$placeId = $result['place_id'];
|
164
|
|
|
$placeUri = $uriNameSpace . $placeId;
|
165
|
|
|
$data['id'] = $placeId;
|
|
|
|
|
166
|
|
|
$data['uri'] = $placeUri;
|
|
|
|
|
167
|
|
|
$data['businessType'] = $types;
|
168
|
|
|
if($uri) {
|
169
|
|
|
$data[$similarityPredicate] = $uri;
|
170
|
|
|
}
|
171
|
|
|
|
172
|
|
|
if( isset($result['geometry']['location'])) {
|
173
|
|
|
$data['lat'] = $result['geometry']['location']['lat'];
|
174
|
|
|
$data['long'] = $result['geometry']['location']['lng'];
|
175
|
|
|
}
|
176
|
|
|
if( isset($result['formatted_address'])) {
|
177
|
|
|
$data['addressDescription'] = $result['formatted_address'];
|
178
|
|
|
}
|
179
|
|
|
if( isset($result['name'])) {
|
180
|
|
|
$data['businessName'] = $result['name'];
|
181
|
|
|
}
|
182
|
|
|
if( isset($result['types'])) {
|
183
|
|
|
$data['disambiguatingDescription'] = $result['types'];
|
184
|
|
|
}
|
185
|
|
|
|
186
|
|
|
|
187
|
|
|
//--------------------------------------------------------------------------------
|
188
|
|
|
// call google place details api, tolerating some errors.
|
189
|
|
|
//--------------------------------------------------------------------------------
|
190
|
|
|
if ($detailLevel==='contact') {
|
191
|
|
|
try {
|
192
|
|
|
$details=$googlePlaces->placeDetails($placeId, array('region'=> $region));
|
193
|
|
|
$consecutiveErrorsCount=0;
|
194
|
|
|
$callCount++;
|
195
|
|
|
} catch (\Exception $e) {
|
196
|
|
|
$consecutiveErrorsCount++;$callErrorCount++;
|
197
|
|
|
if( $consecutiveErrorsCount > $resilience){
|
198
|
|
|
throw $e;
|
199
|
|
|
}
|
200
|
|
|
$messageString = trim(preg_replace('/\s+/', ' ', $e->getMessage()));
|
201
|
|
|
$output->writeln("<error># Ignored Details Api ERROR ($consecutiveErrorsCount): $messageString</error>");
|
202
|
|
|
}
|
203
|
|
|
|
204
|
|
|
// skip empty results
|
205
|
|
|
if ('OK' === $googlePlaces->getStatus()){
|
206
|
|
|
// factualize placeDetails results
|
207
|
|
|
$result =$details['result'];
|
208
|
|
|
if( isset($result['address_components'][1]['short_name']) ) {
|
209
|
|
|
$data['streetAddress'] = $result['address_components'][1]['short_name'];
|
210
|
|
|
}
|
211
|
|
|
if( isset($result['address_components'][0]['short_name']) ) {
|
212
|
|
|
$data['streetAddress'] .= ', ' . $result['address_components'][0]['short_name'];
|
213
|
|
|
}
|
214
|
|
|
if( isset($result['address_components'][3]['short_name']) ) {
|
215
|
|
|
$data['addressLocality'] = $result['address_components'][3]['short_name'];
|
216
|
|
|
}
|
217
|
|
|
if( isset($result['address_components'][0]['short_name']) ) {
|
218
|
|
|
$data['addressRegion'] = $result['address_components'][4]['short_name'];
|
219
|
|
|
}
|
220
|
|
|
if( isset($result['address_components'][5]['short_name']) ) {
|
221
|
|
|
$data['addressRegioneIstat'] = $result['address_components'][5]['short_name'];
|
222
|
|
|
}
|
223
|
|
|
if( isset($result['address_components'][7]['short_name']) ) {
|
224
|
|
|
$data['postalCode'] = $result['address_components'][7]['short_name'];
|
225
|
|
|
}
|
226
|
|
|
if( isset($result['formatted_phone_number']) ) {
|
227
|
|
|
$data['telephone'] = $result['formatted_phone_number'];
|
228
|
|
|
}
|
229
|
|
|
if( isset($result['website']) ) {
|
230
|
|
|
$data['page'] = $result['website'];
|
231
|
|
|
}
|
232
|
|
|
if( isset($result['url']) ) {
|
233
|
|
|
$data['hasMap'] = $result['url'];
|
234
|
|
|
}
|
235
|
|
|
} else {
|
236
|
|
|
$output->writeln("<info># no details for place id '$placeId' details</info>");
|
237
|
|
|
}
|
238
|
|
|
}
|
239
|
|
|
|
240
|
|
|
try {
|
241
|
|
|
$facts =$factsFactory->factualize($data);
|
242
|
|
|
echo $facts->asTurtleFragment(), "\n";
|
243
|
|
|
$droppedFields = $facts->getDroppedFields();
|
244
|
|
|
if(!empty($droppedFields)) {
|
245
|
|
|
$output->writeln("<error># Dropped ".implode(", ", $droppedFields).'</error>');
|
246
|
|
|
$this->factsFactory->addToCounter('error');
|
|
|
|
|
247
|
|
|
}
|
248
|
|
|
if($provenance){
|
249
|
|
|
$searchString = \BOTK\Filters::FILTER_SANITIZE_TURTLE_STRING($query);
|
250
|
|
|
$now = date('c');
|
251
|
|
|
echo "<$placeUri> prov:generatedAtTime \"$now\"^^xsd:dateTime; prov:wasDerivedFrom [prov:value \"$searchString\"].\n";
|
252
|
|
|
}
|
253
|
|
|
} catch (\BOTK\Exception\Warning $e) {
|
254
|
|
|
$output->writeln("<comment># ".$e->getMessage().'</comment>');
|
255
|
|
|
}
|
256
|
|
|
|
257
|
|
|
|
258
|
|
|
sleep($sleepTime);
|
259
|
|
|
}
|
260
|
|
|
|
261
|
|
|
if ($callCount >= $limit && $placeId) {
|
|
|
|
|
262
|
|
|
$output->writeln("<comment># Api call limit reached ($callCount).</comment>");
|
263
|
|
|
}
|
264
|
|
|
|
265
|
|
|
// prints provenances and other metadata
|
266
|
|
|
echo $factsFactory->generateLinkedDataFooter();
|
267
|
|
|
$output->writeln("<info># Called $callCount APIs, $callErrorCount errors.</info>");
|
268
|
|
|
}
|
269
|
|
|
} |
Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.
Let’s take a look at an example:
As you can see in this example, the array
$myArray
is initialized the first time when the foreach loop is entered. You can also see that the value of thebar
key is only written conditionally; thus, its value might result from a previous iteration.This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.