1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace GeminiLabs\SiteReviews\Commands; |
4
|
|
|
|
5
|
|
|
use GeminiLabs\SiteReviews\Database; |
6
|
|
|
use GeminiLabs\SiteReviews\Database\Query; |
7
|
|
|
use GeminiLabs\SiteReviews\Defaults\StatDefaults; |
8
|
|
|
use GeminiLabs\SiteReviews\Geolocation; |
9
|
|
|
use GeminiLabs\SiteReviews\Helper; |
10
|
|
|
use GeminiLabs\SiteReviews\Modules\Notice; |
11
|
|
|
use GeminiLabs\SiteReviews\Modules\Queue; |
12
|
|
|
use GeminiLabs\SiteReviews\Response; |
13
|
|
|
use GeminiLabs\SiteReviews\Review; |
14
|
|
|
|
15
|
|
|
class GeolocateReviews extends AbstractCommand |
16
|
|
|
{ |
17
|
|
|
/** |
18
|
|
|
* IP-API batch requests allow a maximum of 100 IPs per request. |
19
|
|
|
*/ |
20
|
|
|
public const BATCH_SIZE = 100; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* Number of rows per insert query. |
24
|
|
|
*/ |
25
|
|
|
public const INSERT_CHUNK_SIZE = 500; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Transient key for processing lock. |
29
|
|
|
*/ |
30
|
|
|
public const LOCK_KEY = 'glsr_geolocation_processing_lock'; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Key used for the queued action. |
34
|
|
|
*/ |
35
|
|
|
public const QUEUED_ACTION_KEY = 'queue/geolocations'; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Integer number of rows to fetch per database query in generator. |
39
|
|
|
*/ |
40
|
|
|
public const ROW_BATCH_SIZE = 500; |
41
|
|
|
|
42
|
|
|
public function handle(): void |
43
|
|
|
{ |
44
|
|
|
$this->queue(true); |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* Process a batch of IPs for geolocation data. |
49
|
|
|
* |
50
|
|
|
* Fetches IPs, retrieves geolocation data, inserts stats, and updates post meta. |
51
|
|
|
* |
52
|
|
|
* @param int $offset Offset for IP query |
53
|
|
|
*/ |
54
|
|
|
public function process(int $offset = 0): void |
55
|
|
|
{ |
56
|
|
|
$offset = max(0, $offset); |
57
|
|
|
$ipAddresses = $this->fetchIpsNeedingGeolocation($offset); |
58
|
|
|
if (empty($ipAddresses)) { |
59
|
|
|
return; |
60
|
|
|
} |
61
|
|
|
$response = $this->fetchRemoteGeolocationData($ipAddresses); |
62
|
|
|
$results = $response->body(); |
63
|
|
|
if (empty($results)) { |
64
|
|
|
glsr_log()->warning("Geolocation: No geolocation data retrieved at offset {$offset}"); |
65
|
|
|
return; |
66
|
|
|
} |
67
|
|
|
$validResults = $this->filterValidGeolocationResults($results); |
68
|
|
|
if (empty($validResults)) { |
69
|
|
|
glsr_log()->warning("Geolocation: No valid geolocation results at offset {$offset}"); |
70
|
|
|
return; |
71
|
|
|
} |
72
|
|
|
$this->processResults($validResults); |
73
|
|
|
$this->scheduleNextBatchIfNeeded($offset, static::BATCH_SIZE, $ipAddresses); |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
public function processReview(Review $review): void |
77
|
|
|
{ |
78
|
|
|
if (!$review->isValid()) { |
79
|
|
|
return; |
80
|
|
|
} |
81
|
|
|
if (Helper::isLocalIpAddress($review->ip_address)) { |
82
|
|
|
return; |
83
|
|
|
} |
84
|
|
|
$response = glsr(Geolocation::class)->lookup($review->ip_address); |
85
|
|
|
if ($response->failed()) { |
86
|
|
|
return; |
87
|
|
|
} |
88
|
|
|
$results = $this->filterValidGeolocationResults([$response->body()]); |
89
|
|
|
if (empty($results[0])) { |
90
|
|
|
return; |
91
|
|
|
} |
92
|
|
|
$result = glsr(StatDefaults::class)->restrict( |
93
|
|
|
wp_parse_args(['rating_id' => $review->rating_id], $results[0]) |
94
|
|
|
); |
95
|
|
|
glsr(Database::class)->insert('stats', $result); |
96
|
|
|
update_post_meta($review->ID, '_geolocation', array_diff_key($result, ['rating_id' => 0])); |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* Start processing via WP-Cron. |
101
|
|
|
*/ |
102
|
|
|
public function queue(bool $notify = false): bool |
103
|
|
|
{ |
104
|
|
|
if (!glsr(Queue::class)->isPending(static::QUEUED_ACTION_KEY)) { |
105
|
|
|
$this->releaseLock(); |
106
|
|
|
} |
107
|
|
|
if (get_transient(static::LOCK_KEY)) { // Prevent concurrent processing |
108
|
|
|
if ($notify) { |
109
|
|
|
glsr(Notice::class)->addWarning( |
110
|
|
|
_x('Geolocation processing is already in progress.', 'admin-text', 'site-reviews') |
111
|
|
|
); |
112
|
|
|
} |
113
|
|
|
return false; |
114
|
|
|
} |
115
|
|
|
if (!$ipsToProcess = $this->countIpsNeedingGeolocation()) { |
116
|
|
|
if ($notify) { |
117
|
|
|
glsr(Notice::class)->addInfo( |
118
|
|
|
_x('All valid IP addresses have already been geolocated.', 'admin-text', 'site-reviews') |
119
|
|
|
); |
120
|
|
|
} |
121
|
|
|
return false; |
122
|
|
|
} |
123
|
|
|
$this->lock(); |
124
|
|
|
glsr(Queue::class)->once(time(), static::QUEUED_ACTION_KEY, ['offset' => 0], true); |
125
|
|
|
if ($notify) { |
126
|
|
|
glsr(Notice::class)->addSuccess(sprintf( |
127
|
|
|
_x('Successfully queued geolocation processing of %d IP addresses.', 'admin-text', 'site-reviews'), |
128
|
|
|
$ipsToProcess |
129
|
|
|
)); |
130
|
|
|
} |
131
|
|
|
return true; |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
public function response(): array |
135
|
|
|
{ |
136
|
|
|
return [ |
137
|
|
|
'notices' => glsr(Notice::class)->get(), |
138
|
|
|
]; |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
protected function countIpsNeedingGeolocation(): int |
142
|
|
|
{ |
143
|
|
|
$sql = " |
144
|
|
|
SELECT COUNT(r.ID) |
145
|
|
|
FROM table|ratings AS r |
146
|
|
|
LEFT JOIN table|stats AS s ON (r.ID = s.rating_id) |
147
|
|
|
WHERE 1=1 |
148
|
|
|
AND r.ip_address IS NOT NULL |
149
|
|
|
AND r.ip_address != '' |
150
|
|
|
AND r.ip_address != '127.0.0.1' |
151
|
|
|
AND r.ip_address != 'unknown' |
152
|
|
|
AND s.rating_id IS NULL |
153
|
|
|
"; |
154
|
|
|
$query = glsr(Query::class)->sql($sql); |
155
|
|
|
return (int) glsr(Database::class)->dbGetVar($query); |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* @param int $offset Offset for pagination |
160
|
|
|
*/ |
161
|
|
|
protected function fetchIpsNeedingGeolocation(int $offset): array |
162
|
|
|
{ |
163
|
|
|
$sql = " |
164
|
|
|
SELECT DISTINCT r.ip_address |
165
|
|
|
FROM table|ratings AS r |
166
|
|
|
LEFT JOIN table|stats AS s ON (r.ID = s.rating_id) |
167
|
|
|
WHERE 1=1 |
168
|
|
|
AND r.ip_address IS NOT NULL |
169
|
|
|
AND r.ip_address != '' |
170
|
|
|
AND r.ip_address != '127.0.0.1' |
171
|
|
|
AND r.ip_address != 'unknown' |
172
|
|
|
AND s.rating_id IS NULL |
173
|
|
|
LIMIT %d OFFSET %d |
174
|
|
|
"; |
175
|
|
|
$query = glsr(Query::class)->sql($sql, static::BATCH_SIZE, $offset); |
176
|
|
|
return glsr(Database::class)->dbGetCol($query); |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
/** |
180
|
|
|
* @param string[] $ipAddresses IPs to fetch data for |
181
|
|
|
*/ |
182
|
|
|
protected function fetchRemoteGeolocationData(array $ipAddresses): Response |
183
|
|
|
{ |
184
|
|
|
$response = glsr(Geolocation::class)->batchLookup($ipAddresses); |
185
|
|
|
$remainingRequests = (int) $response->headers['x-rl']; |
186
|
|
|
$resetTime = max((int) $response->headers['x-ttl'], 60); // Min 60 seconds |
187
|
|
|
if (0 === $remainingRequests && $resetTime > 0) { |
188
|
|
|
glsr_log()->warning("Geolocation: Rate limit reached, waiting {$resetTime} seconds"); |
189
|
|
|
sleep($resetTime); |
190
|
|
|
} else { |
191
|
|
|
if (422 === $response->code) { |
192
|
|
|
glsr_log()->error('Geolocation: 422 Unprocessable Entity, invalid batch request'); |
193
|
|
|
} |
194
|
|
|
if (429 === $response->code) { |
195
|
|
|
glsr_log()->warning("Geolocation: 429 Too Many Requests, waiting {$resetTime} seconds"); |
196
|
|
|
sleep($resetTime); |
197
|
|
|
} |
198
|
|
|
} |
199
|
|
|
return $response; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* @param array $results Geolocation API results |
204
|
|
|
*/ |
205
|
|
|
protected function filterValidGeolocationResults(array $results): array |
206
|
|
|
{ |
207
|
|
|
return array_filter($results, function ($result) { |
208
|
|
|
$query = $result['query'] ?? ''; |
209
|
|
|
$status = $result['status'] ?? ''; |
210
|
|
|
return 'success' === $status && !empty($query); |
211
|
|
|
}); |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
protected function lock(int $duration = \HOUR_IN_SECONDS): void |
215
|
|
|
{ |
216
|
|
|
set_transient(static::LOCK_KEY, true, $duration); |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
/** |
220
|
|
|
* @param \Generator $generator Generator yielding ratings data |
221
|
|
|
* @param array $results Valid geolocation results |
222
|
|
|
*/ |
223
|
|
|
protected function prepareAndInsert(\Generator $generator, array $results): void |
224
|
|
|
{ |
225
|
|
|
$data = []; |
226
|
|
|
$postmeta = []; |
227
|
|
|
$postmetaCol = [ |
228
|
|
|
'post_id', 'meta_key', 'meta_value', |
229
|
|
|
]; |
230
|
|
|
$statsCol = array_keys(glsr(StatDefaults::class)->defaults()); |
231
|
|
|
foreach ($generator as $item) { |
232
|
|
|
$result = current(array_filter($results, fn ($r) => $r['query'] === $item['ip_address'])); |
233
|
|
|
$result = glsr(StatDefaults::class)->restrict( |
234
|
|
|
wp_parse_args($item, $result) |
235
|
|
|
); |
236
|
|
|
$data[] = $result; |
237
|
|
|
$postmeta[] = [ |
238
|
|
|
'post_id' => $item['review_id'], |
239
|
|
|
'meta_key' => '_geolocation', |
240
|
|
|
'meta_value' => maybe_serialize(array_diff_key($result, ['rating_id' => 0])), |
241
|
|
|
]; |
242
|
|
|
if (count($data) >= static::INSERT_CHUNK_SIZE) { |
243
|
|
|
glsr(Database::class)->insertBulk('stats', $data, $statsCol); |
244
|
|
|
glsr(Database::class)->insertBulk('postmeta', $postmeta, $postmetaCol); |
245
|
|
|
$postmeta = []; |
246
|
|
|
$data = []; |
247
|
|
|
} |
248
|
|
|
} |
249
|
|
|
if (!empty($data) && !empty($postmeta)) { |
250
|
|
|
glsr(Database::class)->insertBulk('stats', $data, $statsCol); |
251
|
|
|
glsr(Database::class)->insertBulk('postmeta', $postmeta, $postmetaCol); |
252
|
|
|
} |
253
|
|
|
} |
254
|
|
|
|
255
|
|
|
protected function processResults(array $results): void |
256
|
|
|
{ |
257
|
|
|
$validIps = wp_list_pluck($results, 'query'); |
258
|
|
|
$generator = $this->resultsGenerator($validIps); |
259
|
|
|
$this->prepareAndInsert($generator, $results); |
260
|
|
|
} |
261
|
|
|
|
262
|
|
|
/** |
263
|
|
|
* Release the processing lock. |
264
|
|
|
*/ |
265
|
|
|
protected function releaseLock(): void |
266
|
|
|
{ |
267
|
|
|
delete_transient(static::LOCK_KEY); |
268
|
|
|
} |
269
|
|
|
|
270
|
|
|
/** |
271
|
|
|
* Generator to yield ratings data for a list of IP addresses. |
272
|
|
|
* Uses pagination to handle large result sets efficiently. |
273
|
|
|
* |
274
|
|
|
* @param string[] $ipAddresses List of IPs to query |
275
|
|
|
*/ |
276
|
|
|
protected function resultsGenerator(array $ipAddresses): \Generator |
277
|
|
|
{ |
278
|
|
|
$ipChunks = array_chunk($ipAddresses, static::BATCH_SIZE); |
279
|
|
|
foreach ($ipChunks as $chunk) { |
280
|
|
|
$offset = 0; |
281
|
|
|
$placeholders = implode(',', array_fill(0, count($chunk), '%s')); |
282
|
|
|
do { |
283
|
|
|
$sql = " |
284
|
|
|
SELECT ip_address, ID AS rating_id, review_id |
285
|
|
|
FROM table|ratings |
286
|
|
|
WHERE ip_address IN ($placeholders) |
287
|
|
|
LIMIT %d OFFSET %d |
288
|
|
|
"; |
289
|
|
|
$query = glsr(Query::class)->sql($sql, array_merge($chunk, [static::ROW_BATCH_SIZE, $offset])); |
290
|
|
|
$results = glsr(Database::class)->dbGetResults($query, \ARRAY_A); |
291
|
|
|
foreach ($results as $row) { |
292
|
|
|
yield $row; |
293
|
|
|
} |
294
|
|
|
$offset += static::ROW_BATCH_SIZE; |
295
|
|
|
$hasResults = !empty($results); |
296
|
|
|
unset($results); // Free memory |
297
|
|
|
} while ($hasResults); |
298
|
|
|
} |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
/** |
302
|
|
|
* Schedule the next batch of IPs or release the lock if no more IPs remain. |
303
|
|
|
* |
304
|
|
|
* @param int $offset Current offset |
305
|
|
|
* @param int $batchSize Size of the current batch |
306
|
|
|
* @param array $ipAddresses Current batch of IPs |
307
|
|
|
*/ |
308
|
|
|
protected function scheduleNextBatchIfNeeded(int $offset, int $batchSize, array $ipAddresses, int $delay = 60): void |
309
|
|
|
{ |
310
|
|
|
if (count($ipAddresses) === $batchSize) { |
311
|
|
|
$timestamp = time() + max(0, $delay); |
312
|
|
|
glsr(Queue::class)->once($timestamp, static::QUEUED_ACTION_KEY, ['offset' => $offset + $batchSize], true); |
313
|
|
|
} else { |
314
|
|
|
$this->releaseLock(); |
315
|
|
|
} |
316
|
|
|
} |
317
|
|
|
} |
318
|
|
|
|