Test Failed
Push — develop ( ad6b70...42909d )
by Paul
09:48
created

GeolocateReviews::processReview()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 21
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 21
rs 9.4888
c 0
b 0
f 0
cc 5
nc 5
nop 1
1
<?php
2
3
namespace GeminiLabs\SiteReviews\Commands;
4
5
use GeminiLabs\SiteReviews\Database;
6
use GeminiLabs\SiteReviews\Database\Query;
7
use GeminiLabs\SiteReviews\Defaults\StatDefaults;
8
use GeminiLabs\SiteReviews\Geolocation;
9
use GeminiLabs\SiteReviews\Helper;
10
use GeminiLabs\SiteReviews\Modules\Notice;
11
use GeminiLabs\SiteReviews\Modules\Queue;
12
use GeminiLabs\SiteReviews\Response;
13
use GeminiLabs\SiteReviews\Review;
14
15
class GeolocateReviews extends AbstractCommand
16
{
17
    /**
18
     * IP-API batch requests allow a maximum of 100 IPs per request.
19
     */
20
    public const BATCH_SIZE = 100;
21
22
    /**
23
     * Number of rows per insert query.
24
     */
25
    public const INSERT_CHUNK_SIZE = 500;
26
27
    /**
28
     * Transient key for processing lock.
29
     */
30
    public const LOCK_KEY = 'glsr_geolocation_processing_lock';
31
32
    /**
33
     * Key used for the queued action.
34
     */
35
    public const QUEUED_ACTION_KEY = 'queue/geolocations';
36
37
    /**
38
     * Integer number of rows to fetch per database query in generator.
39
     */
40
    public const ROW_BATCH_SIZE = 500;
41
42
    public function handle(): void
43
    {
44
        $this->queue(true);
45
    }
46
47
    /**
48
     * Process a batch of IPs for geolocation data.
49
     *
50
     * Fetches IPs, retrieves geolocation data, inserts stats, and updates post meta.
51
     *
52
     * @param int $offset Offset for IP query
53
     */
54
    public function process(int $offset = 0): void
55
    {
56
        $offset = max(0, $offset);
57
        $ipAddresses = $this->fetchIpsNeedingGeolocation($offset);
58
        if (empty($ipAddresses)) {
59
            return;
60
        }
61
        $response = $this->fetchRemoteGeolocationData($ipAddresses);
62
        $results = $response->body();
63
        if (empty($results)) {
64
            glsr_log()->warning("Geolocation: No geolocation data retrieved at offset {$offset}");
65
            return;
66
        }
67
        $validResults = $this->filterValidGeolocationResults($results);
68
        if (empty($validResults)) {
69
            glsr_log()->warning("Geolocation: No valid geolocation results at offset {$offset}");
70
            return;
71
        }
72
        $this->processResults($validResults);
73
        $this->scheduleNextBatchIfNeeded($offset, static::BATCH_SIZE, $ipAddresses);
74
    }
75
76
    public function processReview(Review $review): void
77
    {
78
        if (!$review->isValid()) {
79
            return;
80
        }
81
        if (Helper::isLocalIpAddress($review->ip_address)) {
82
            return;
83
        }
84
        $response = glsr(Geolocation::class)->lookup($review->ip_address);
85
        if ($response->failed()) {
86
            return;
87
        }
88
        $results = $this->filterValidGeolocationResults([$response->body()]);
89
        if (empty($results[0])) {
90
            return;
91
        }
92
        $result = glsr(StatDefaults::class)->restrict(
93
            wp_parse_args(['rating_id' => $review->rating_id], $results[0])
94
        );
95
        glsr(Database::class)->insert('stats', $result);
96
        update_post_meta($review->ID, '_geolocation', array_diff_key($result, ['rating_id' => 0]));
97
    }
98
99
    /**
100
     * Start processing via WP-Cron.
101
     */
102
    public function queue(bool $notify = false): bool
103
    {
104
        if (!glsr(Queue::class)->isPending(static::QUEUED_ACTION_KEY)) {
105
            $this->releaseLock();
106
        }
107
        if (get_transient(static::LOCK_KEY)) { // Prevent concurrent processing
108
            if ($notify) {
109
                glsr(Notice::class)->addWarning(
110
                    _x('Geolocation processing is already in progress.', 'admin-text', 'site-reviews')
111
                );
112
            }
113
            return false;
114
        }
115
        if (!$ipsToProcess = $this->countIpsNeedingGeolocation()) {
116
            if ($notify) {
117
                glsr(Notice::class)->addInfo(
118
                    _x('All valid IP addresses have already been geolocated.', 'admin-text', 'site-reviews')
119
                );
120
            }
121
            return false;
122
        }
123
        $this->lock();
124
        glsr(Queue::class)->once(time(), static::QUEUED_ACTION_KEY, ['offset' => 0], true);
125
        if ($notify) {
126
            glsr(Notice::class)->addSuccess(sprintf(
127
                _x('Successfully queued geolocation processing of %d IP addresses.', 'admin-text', 'site-reviews'),
128
                $ipsToProcess
129
            ));
130
        }
131
        return true;
132
    }
133
134
    public function response(): array
135
    {
136
        return [
137
            'notices' => glsr(Notice::class)->get(),
138
        ];
139
    }
140
141
    protected function countIpsNeedingGeolocation(): int
142
    {
143
        $sql = "
144
            SELECT COUNT(r.ID)
145
            FROM table|ratings AS r
146
            LEFT JOIN table|stats AS s ON (r.ID = s.rating_id)
147
            WHERE 1=1
148
            AND r.ip_address IS NOT NULL
149
            AND r.ip_address != ''
150
            AND r.ip_address != '127.0.0.1'
151
            AND r.ip_address != 'unknown'
152
            AND s.rating_id IS NULL
153
        ";
154
        $query = glsr(Query::class)->sql($sql);
155
        return (int) glsr(Database::class)->dbGetVar($query);
156
    }
157
158
    /**
159
     * @param int $offset Offset for pagination
160
     */
161
    protected function fetchIpsNeedingGeolocation(int $offset): array
162
    {
163
        $sql = "
164
            SELECT DISTINCT r.ip_address
165
            FROM table|ratings AS r
166
            LEFT JOIN table|stats AS s ON (r.ID = s.rating_id)
167
            WHERE 1=1
168
            AND r.ip_address IS NOT NULL
169
            AND r.ip_address != ''
170
            AND r.ip_address != '127.0.0.1'
171
            AND r.ip_address != 'unknown'
172
            AND s.rating_id IS NULL
173
            LIMIT %d OFFSET %d
174
        ";
175
        $query = glsr(Query::class)->sql($sql, static::BATCH_SIZE, $offset);
176
        return glsr(Database::class)->dbGetCol($query);
177
    }
178
179
    /**
180
     * @param string[] $ipAddresses IPs to fetch data for
181
     */
182
    protected function fetchRemoteGeolocationData(array $ipAddresses): Response
183
    {
184
        $response = glsr(Geolocation::class)->batchLookup($ipAddresses);
185
        $remainingRequests = (int) $response->headers['x-rl'];
186
        $resetTime = max((int) $response->headers['x-ttl'], 60); // Min 60 seconds
187
        if (0 === $remainingRequests && $resetTime > 0) {
188
            glsr_log()->warning("Geolocation: Rate limit reached, waiting {$resetTime} seconds");
189
            sleep($resetTime);
190
        } else {
191
            if (422 === $response->code) {
192
                glsr_log()->error('Geolocation: 422 Unprocessable Entity, invalid batch request');
193
            }
194
            if (429 === $response->code) {
195
                glsr_log()->warning("Geolocation: 429 Too Many Requests, waiting {$resetTime} seconds");
196
                sleep($resetTime);
197
            }
198
        }
199
        return $response;
200
    }
201
202
    /**
203
     * @param array $results Geolocation API results
204
     */
205
    protected function filterValidGeolocationResults(array $results): array
206
    {
207
        return array_filter($results, function ($result) {
208
            $query = $result['query'] ?? '';
209
            $status = $result['status'] ?? '';
210
            return 'success' === $status && !empty($query);
211
        });
212
    }
213
214
    protected function lock(int $duration = \HOUR_IN_SECONDS): void
215
    {
216
        set_transient(static::LOCK_KEY, true, $duration);
217
    }
218
219
    /**
220
     * @param \Generator $generator Generator yielding ratings data
221
     * @param array      $results   Valid geolocation results
222
     */
223
    protected function prepareAndInsert(\Generator $generator, array $results): void
224
    {
225
        $data = [];
226
        $postmeta = [];
227
        $postmetaCol = [
228
            'post_id', 'meta_key', 'meta_value',
229
        ];
230
        $statsCol = array_keys(glsr(StatDefaults::class)->defaults());
231
        foreach ($generator as $item) {
232
            $result = current(array_filter($results, fn ($r) => $r['query'] === $item['ip_address']));
233
            $result = glsr(StatDefaults::class)->restrict(
234
                wp_parse_args($item, $result)
235
            );
236
            $data[] = $result;
237
            $postmeta[] = [
238
                'post_id' => $item['review_id'],
239
                'meta_key' => '_geolocation',
240
                'meta_value' => maybe_serialize(array_diff_key($result, ['rating_id' => 0])),
241
            ];
242
            if (count($data) >= static::INSERT_CHUNK_SIZE) {
243
                glsr(Database::class)->insertBulk('stats', $data, $statsCol);
244
                glsr(Database::class)->insertBulk('postmeta', $postmeta, $postmetaCol);
245
                $postmeta = [];
246
                $data = [];
247
            }
248
        }
249
        if (!empty($data) && !empty($postmeta)) {
250
            glsr(Database::class)->insertBulk('stats', $data, $statsCol);
251
            glsr(Database::class)->insertBulk('postmeta', $postmeta, $postmetaCol);
252
        }
253
    }
254
255
    protected function processResults(array $results): void
256
    {
257
        $validIps = wp_list_pluck($results, 'query');
258
        $generator = $this->resultsGenerator($validIps);
259
        $this->prepareAndInsert($generator, $results);
260
    }
261
262
    /**
263
     * Release the processing lock.
264
     */
265
    protected function releaseLock(): void
266
    {
267
        delete_transient(static::LOCK_KEY);
268
    }
269
270
    /**
271
     * Generator to yield ratings data for a list of IP addresses.
272
     * Uses pagination to handle large result sets efficiently.
273
     *
274
     * @param string[] $ipAddresses List of IPs to query
275
     */
276
    protected function resultsGenerator(array $ipAddresses): \Generator
277
    {
278
        $ipChunks = array_chunk($ipAddresses, static::BATCH_SIZE);
279
        foreach ($ipChunks as $chunk) {
280
            $offset = 0;
281
            $placeholders = implode(',', array_fill(0, count($chunk), '%s'));
282
            do {
283
                $sql = "
284
                    SELECT ip_address, ID AS rating_id, review_id
285
                    FROM table|ratings
286
                    WHERE ip_address IN ($placeholders)
287
                    LIMIT %d OFFSET %d
288
                ";
289
                $query = glsr(Query::class)->sql($sql, array_merge($chunk, [static::ROW_BATCH_SIZE, $offset]));
290
                $results = glsr(Database::class)->dbGetResults($query, \ARRAY_A);
291
                foreach ($results as $row) {
292
                    yield $row;
293
                }
294
                $offset += static::ROW_BATCH_SIZE;
295
                $hasResults = !empty($results);
296
                unset($results); // Free memory
297
            } while ($hasResults);
298
        }
299
    }
300
301
    /**
302
     * Schedule the next batch of IPs or release the lock if no more IPs remain.
303
     *
304
     * @param int   $offset      Current offset
305
     * @param int   $batchSize   Size of the current batch
306
     * @param array $ipAddresses Current batch of IPs
307
     */
308
    protected function scheduleNextBatchIfNeeded(int $offset, int $batchSize, array $ipAddresses, int $delay = 60): void
309
    {
310
        if (count($ipAddresses) === $batchSize) {
311
            $timestamp = time() + max(0, $delay);
312
            glsr(Queue::class)->once($timestamp, static::QUEUED_ACTION_KEY, ['offset' => $offset + $batchSize], true);
313
        } else {
314
            $this->releaseLock();
315
        }
316
    }
317
}
318