1
|
|
|
<?php
|
2
|
|
|
namespace BOTK\Command;
|
3
|
|
|
|
4
|
|
|
use Symfony\Component\Console\Command\Command;
|
5
|
|
|
use Symfony\Component\Console\Input\InputInterface;
|
6
|
|
|
use Symfony\Component\Console\Input\InputOption;
|
7
|
|
|
use Symfony\Component\Console\Input\InputArgument;
|
8
|
|
|
use Symfony\Component\Console\Output\OutputInterface;
|
9
|
|
|
use Symfony\Component\Console\Question\Question;
|
10
|
|
|
use SKAgarwal\GoogleApi\PlacesApi;
|
11
|
|
|
use BOTK\FactsFactory;
|
12
|
|
|
|
13
|
|
|
class PostmanReasoningCommand extends Command
|
14
|
|
|
{
|
15
|
|
|
private $factsFactory;
|
16
|
|
|
|
17
|
|
|
public function __construct(FactsFactory $factsFactory = null)
|
18
|
|
|
{
|
19
|
|
|
if( is_null($factsFactory)){
|
20
|
|
|
$factsFactory = new \BOTK\FactsFactory( array(
|
21
|
|
|
'model' => 'LocalBusiness',
|
22
|
|
|
'modelOptions' => ['id' => ['filter'=> FILTER_DEFAULT]]
|
23
|
|
|
));
|
24
|
|
|
}
|
25
|
|
|
$this->factsFactory = $factsFactory;
|
26
|
|
|
|
27
|
|
|
parent::__construct();
|
28
|
|
|
}
|
29
|
|
|
|
30
|
|
|
|
31
|
|
|
protected function configure()
|
32
|
|
|
{
|
33
|
|
|
$this
|
34
|
|
|
->setName('postman:reasoning')
|
35
|
|
|
->setDescription('Discover information about local business from its postal address.')
|
36
|
|
|
->setHelp(
|
37
|
|
|
'This command mimics a postman reasoning in learning data from a string containing '
|
38
|
|
|
.'a place name and/or a postal address (also incomplete) '."\n"
|
39
|
|
|
.'It tries to googling the internet returning a ttl file according '
|
40
|
|
|
.'botk Language profile. '."\n"
|
41
|
|
|
.'As input it requires a csv like streams with two fields: a search string and '
|
42
|
|
|
.'an uri to be linked (optional)'
|
43
|
|
|
)
|
44
|
|
|
|
45
|
|
|
->addArgument('csvFile', InputArgument::REQUIRED, 'A CSV file of records in the form: "query", uri . - means STDIN')
|
46
|
|
|
|
47
|
|
|
->addOption('namespace','U', InputOption::VALUE_REQUIRED,
|
48
|
|
|
'the namespace for created Local Business URI',
|
49
|
|
|
'http://linkeddata.center/resource/'
|
50
|
|
|
)
|
51
|
|
|
->addOption('delay','d', InputOption::VALUE_REQUIRED,
|
52
|
|
|
'delay each call of a fixed amount of seconds',
|
53
|
|
|
0
|
54
|
|
|
)
|
55
|
|
|
->addOption('skip','s', InputOption::VALUE_REQUIRED,
|
56
|
|
|
'number of INPUT lines to skip',
|
57
|
|
|
1
|
58
|
|
|
)
|
59
|
|
|
->addOption('resilience','r', InputOption::VALUE_REQUIRED,
|
60
|
|
|
'max number of errors tolerated before aborting',
|
61
|
|
|
10
|
62
|
|
|
)
|
63
|
|
|
->addOption('fields','f', InputOption::VALUE_REQUIRED,
|
64
|
|
|
'detalis level required (none|contact)',
|
65
|
|
|
'contact'
|
66
|
|
|
)
|
67
|
|
|
->addOption('type','t', InputOption::VALUE_REQUIRED | InputOption::VALUE_IS_ARRAY,
|
68
|
|
|
'additional RDF type as uri',
|
69
|
|
|
array('http://schema.org/Place')
|
70
|
|
|
)
|
71
|
|
|
->addOption('assert','a', InputOption::VALUE_REQUIRED,
|
72
|
|
|
'asserted link predicate (sameAs|similarTo)',
|
73
|
|
|
'similarTo'
|
74
|
|
|
)
|
75
|
|
|
->addOption('limit','l', InputOption::VALUE_REQUIRED,
|
76
|
|
|
'max number of calls to google APIs',
|
77
|
|
|
4000
|
78
|
|
|
)
|
79
|
|
|
->addOption('region', 'R', InputOption::VALUE_REQUIRED,
|
80
|
|
|
'the two character country id (e.g. IT) for postman artificial brain imprinting.',
|
81
|
|
|
'IT'
|
82
|
|
|
)
|
83
|
|
|
->addOption('provenance', 'P', InputOption::VALUE_REQUIRED,
|
84
|
|
|
'add provenance info level',
|
85
|
|
|
0
|
86
|
|
|
)
|
87
|
|
|
->addOption('key','k', InputOption::VALUE_REQUIRED,
|
88
|
|
|
'a valid google place api key (see https://developers.google.com/places/web-service/get-api-key)'
|
89
|
|
|
);
|
90
|
|
|
|
91
|
|
|
}
|
92
|
|
|
|
93
|
|
|
|
94
|
|
|
protected function execute(InputInterface $input, OutputInterface $output)
|
95
|
|
|
{
|
96
|
|
|
//cache input parameters
|
97
|
|
|
$csvFile = $input->getArgument('csvFile');
|
98
|
|
|
$uriNameSpace = $input->getOption('namespace');
|
99
|
|
|
$limit = $input->getOption('limit');
|
100
|
|
|
$sleepTime = $input->getOption('delay');
|
101
|
|
|
$detailLevel = $input->getOption('fields');
|
102
|
|
|
$resilience = $input->getOption('resilience');
|
103
|
|
|
$types = $input->getOption('type');
|
104
|
|
|
$similarityPredicate = $input->getOption('assert');
|
105
|
|
|
$provenance = $input->getOption('provenance');
|
106
|
|
|
$region = $input->getOption('region');
|
107
|
|
|
$key = $input->getOption('key');
|
108
|
|
|
|
109
|
|
|
|
110
|
|
|
// ask google api key, if not passed as option
|
111
|
|
|
if( empty($key)){
|
112
|
|
|
$helper = $this->getHelper('question');
|
113
|
|
|
$question = new Question('Please enter your google place api key: ');
|
114
|
|
|
$question->setValidator(function ($value) {
|
115
|
|
|
if (trim($value) == '') {
|
116
|
|
|
throw new \Exception('The key cannot be empty');
|
117
|
|
|
}
|
118
|
|
|
|
119
|
|
|
return $value;
|
120
|
|
|
});
|
121
|
|
|
$question->setHidden(true);
|
122
|
|
|
$question->setMaxAttempts(20);
|
123
|
|
|
|
124
|
|
|
$key = $helper->ask($input, $output, $question);
|
125
|
|
|
}
|
126
|
|
|
|
127
|
|
|
|
128
|
|
|
|
129
|
|
|
$googlePlaces = new PlacesApi($key);
|
130
|
|
|
|
131
|
|
|
// print turtle prefixes
|
132
|
|
|
echo $this->factsFactory->generateLinkedDataHeader();
|
133
|
|
|
|
134
|
|
|
$lineCount=$callErrorCount = $consecutiveErrorsCount = $callCount = 0;
|
135
|
|
|
|
136
|
|
|
|
137
|
|
|
// open csv stream
|
138
|
|
|
$csvStream = ($csvFile === '-')?STDIN:fopen($csvFile, 'r');
|
139
|
|
|
if(!$csvStream ) {throw new \Exception("csvFile not found");}
|
140
|
|
|
|
141
|
|
|
// skip input headers
|
142
|
|
|
for ($i = 0; $i < $input->getOption('skip'); $i++) {
|
143
|
|
|
$lineCount++;
|
144
|
|
|
$output->writeln("<info># Ignored header $lineCount: ". trim(fgets($csvStream)) . '</info>');
|
145
|
|
|
}
|
146
|
|
|
|
147
|
|
|
|
148
|
|
|
while( ($rawData= fgetcsv($csvStream)) && ($callCount <$limit) ){
|
149
|
|
|
$lineCount++;
|
150
|
|
|
|
151
|
|
|
$query = isset($rawData[0])?$rawData[0]:null;
|
152
|
|
|
$uri = isset($rawData[1])?\BOTK\Filters::FILTER_VALIDATE_URI($rawData[1]):null;
|
153
|
|
|
if(!$query) {
|
154
|
|
|
$output->writeln("<error># Ignored invalid row at line $lineCount.</error>");
|
155
|
|
|
continue;
|
156
|
|
|
}
|
157
|
|
|
|
158
|
|
|
|
159
|
|
|
//--------------------------------------------------------------------------------
|
160
|
|
|
// call google place textSearch api, tolerating some errors.
|
161
|
|
|
//--------------------------------------------------------------------------------
|
162
|
|
|
try {
|
163
|
|
|
$searchResultsCollection=$googlePlaces->textSearch($query, array('region'=> $region));
|
164
|
|
|
$consecutiveErrorsCount=0;
|
165
|
|
|
$callCount++;
|
166
|
|
|
} catch (\Exception $e) {
|
167
|
|
|
$consecutiveErrorsCount++;$callErrorCount++;
|
168
|
|
|
if( $consecutiveErrorsCount > $resilience ){
|
169
|
|
|
throw $e;
|
170
|
|
|
}
|
171
|
|
|
$messageString = trim(preg_replace('/\s+/', ' ', $e->getMessage()));
|
172
|
|
|
$output->writeln("<error># Ignored Search Api ERROR ($consecutiveErrorsCount): $messageString</error>");
|
173
|
|
|
continue;
|
174
|
|
|
}
|
175
|
|
|
|
176
|
|
|
// skip empty results
|
177
|
|
|
if ($googlePlaces->getStatus()==='ZERO_RESULTS'){
|
178
|
|
|
$output->writeln("<info># no results for '$query'.</info>");
|
179
|
|
|
continue;
|
180
|
|
|
}
|
181
|
|
|
|
182
|
|
|
|
183
|
|
|
|
184
|
|
|
$output->writeln("<info># discovered data for '$query'.</info>");
|
185
|
|
|
// factualize textSearch results
|
186
|
|
|
$result =$searchResultsCollection['results']->first();
|
187
|
|
|
$placeId = $result['place_id'];
|
188
|
|
|
$placeUri = $uriNameSpace . $placeId;
|
189
|
|
|
|
190
|
|
|
$data=array();
|
191
|
|
|
$data['id'] = $placeId;
|
192
|
|
|
$data['uri'] = $placeUri;
|
193
|
|
|
$data['businessType'] = $types;
|
194
|
|
|
if($uri) {
|
195
|
|
|
$data[$similarityPredicate] = $uri;
|
196
|
|
|
}
|
197
|
|
|
|
198
|
|
|
if( isset($result['geometry']['location'])) {
|
199
|
|
|
$data['lat'] = $result['geometry']['location']['lat'];
|
200
|
|
|
$data['long'] = $result['geometry']['location']['lng'];
|
201
|
|
|
}
|
202
|
|
|
if( isset($result['formatted_address'])) {
|
203
|
|
|
$data['addressDescription'] = $result['formatted_address'];
|
204
|
|
|
}
|
205
|
|
|
if( isset($result['name'])) {
|
206
|
|
|
$data['businessName'] = $result['name'];
|
207
|
|
|
}
|
208
|
|
|
if( isset($result['types'])) {
|
209
|
|
|
$data['disambiguatingDescription'] = $result['types'];
|
210
|
|
|
}
|
211
|
|
|
|
212
|
|
|
|
213
|
|
|
//--------------------------------------------------------------------------------
|
214
|
|
|
// call google place details api, tolerating some errors.
|
215
|
|
|
//--------------------------------------------------------------------------------
|
216
|
|
|
if ($detailLevel==='contact') {
|
217
|
|
|
try {
|
218
|
|
|
$details=$googlePlaces->placeDetails($placeId, array('region'=> $region));
|
219
|
|
|
$consecutiveErrorsCount=0;
|
220
|
|
|
$callCount++;
|
221
|
|
|
} catch (\Exception $e) {
|
222
|
|
|
$consecutiveErrorsCount++;$callErrorCount++;
|
223
|
|
|
if( $consecutiveErrorsCount > $resilience){
|
224
|
|
|
throw $e;
|
225
|
|
|
}
|
226
|
|
|
$messageString = trim(preg_replace('/\s+/', ' ', $e->getMessage()));
|
227
|
|
|
$output->writeln("<error># Ignored Details Api ERROR ($consecutiveErrorsCount): $messageString</error>");
|
228
|
|
|
}
|
229
|
|
|
|
230
|
|
|
// skip empty results
|
231
|
|
|
if ('OK' === $googlePlaces->getStatus()){
|
232
|
|
|
// factualize placeDetails results
|
233
|
|
|
$result =$details['result'];
|
234
|
|
|
if( isset($result['address_components'][1]['short_name']) ) {
|
235
|
|
|
$data['streetAddress'] = $result['address_components'][1]['short_name'];
|
236
|
|
|
}
|
237
|
|
|
if( isset($result['address_components'][0]['short_name']) ) {
|
238
|
|
|
$data['streetAddress'] .= ', ' . $result['address_components'][0]['short_name'];
|
239
|
|
|
}
|
240
|
|
|
if( isset($result['address_components'][3]['short_name']) ) {
|
241
|
|
|
$data['addressLocality'] = $result['address_components'][3]['short_name'];
|
242
|
|
|
}
|
243
|
|
|
if( isset($result['address_components'][0]['short_name']) ) {
|
244
|
|
|
$data['addressRegion'] = $result['address_components'][4]['short_name'];
|
245
|
|
|
}
|
246
|
|
|
if( isset($result['address_components'][5]['short_name']) ) {
|
247
|
|
|
$data['addressRegioneIstat'] = $result['address_components'][5]['short_name'];
|
248
|
|
|
}
|
249
|
|
|
if( isset($result['address_components'][7]['short_name']) ) {
|
250
|
|
|
$data['postalCode'] = $result['address_components'][7]['short_name'];
|
251
|
|
|
}
|
252
|
|
|
if( isset($result['formatted_phone_number']) ) {
|
253
|
|
|
$data['telephone'] = $result['formatted_phone_number'];
|
254
|
|
|
}
|
255
|
|
|
if( isset($result['website']) ) {
|
256
|
|
|
$data['page'] = $result['website'];
|
257
|
|
|
}
|
258
|
|
|
if( isset($result['url']) ) {
|
259
|
|
|
$data['hasMap'] = $result['url'];
|
260
|
|
|
}
|
261
|
|
|
} else {
|
262
|
|
|
$output->writeln("<info># no details for place id '$placeId' details</info>");
|
263
|
|
|
}
|
264
|
|
|
}
|
265
|
|
|
|
266
|
|
|
try {
|
267
|
|
|
$facts =$this->factsFactory->factualize($data);
|
268
|
|
|
echo $facts->asTurtleFragment(), "\n";
|
269
|
|
|
$droppedFields = $facts->getDroppedFields();
|
270
|
|
|
if(!empty($droppedFields)) {
|
271
|
|
|
$output->writeln("<error># Dropped ".implode(", ", $droppedFields).'</error>');
|
272
|
|
|
$this->factsFactory->addToCounter('error');
|
273
|
|
|
}
|
274
|
|
|
if($provenance){
|
275
|
|
|
$searchString = \BOTK\Filters::FILTER_SANITIZE_TURTLE_STRING($query);
|
276
|
|
|
$now = date('c');
|
277
|
|
|
echo "<$placeUri> prov:generatedAtTime \"$now\"^^xsd:dateTime; prov:wasDerivedFrom [prov:value \"$searchString\"].\n";
|
278
|
|
|
}
|
279
|
|
|
} catch (\BOTK\Exception\Warning $e) {
|
280
|
|
|
$output->writeln("<comment># ".$e->getMessage().'</comment>');
|
281
|
|
|
}
|
282
|
|
|
|
283
|
|
|
|
284
|
|
|
sleep($sleepTime);
|
285
|
|
|
}
|
286
|
|
|
|
287
|
|
|
if ($callCount >= $limit && $placeId) {
|
|
|
|
|
288
|
|
|
$output->writeln("<comment># Api call limit reached ($callCount).</comment>");
|
289
|
|
|
}
|
290
|
|
|
|
291
|
|
|
// prints provenances and other metadata
|
292
|
|
|
echo $this->factsFactory->generateLinkedDataFooter();
|
293
|
|
|
$output->writeln("<info># Called $callCount APIs, $callErrorCount errors.</info>");
|
294
|
|
|
}
|
295
|
|
|
} |
If you define a variable conditionally, it can happen that it is not defined for all execution paths.
Let’s take a look at an example:
In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.
Available Fixes
Check for existence of the variable explicitly:
Define a default value for the variable:
Add a value for the missing path: