1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* This file contains all the screens that relate to search engines. |
5
|
|
|
* |
6
|
|
|
* @name ElkArte Forum |
7
|
|
|
* @copyright ElkArte Forum contributors |
8
|
|
|
* @license BSD http://opensource.org/licenses/BSD-3-Clause |
9
|
|
|
* |
10
|
|
|
* This file contains code covered by: |
11
|
|
|
* copyright: 2011 Simple Machines (http://www.simplemachines.org) |
12
|
|
|
* license: BSD, See included LICENSE.TXT for terms and conditions. |
13
|
|
|
* |
14
|
|
|
* @version 1.1 |
15
|
|
|
* |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Do we think the current user is a spider? |
20
|
|
|
* |
21
|
|
|
* @package SearchEngines |
22
|
|
|
* @return int |
23
|
|
|
*/ |
24
|
|
|
function spiderCheck() |
25
|
|
|
{ |
26
|
|
|
global $modSettings; |
27
|
|
|
|
28
|
|
|
$db = database(); |
29
|
|
|
|
30
|
|
|
if (isset($_SESSION['id_robot'])) |
31
|
|
|
unset($_SESSION['id_robot']); |
32
|
|
|
|
33
|
|
|
$_SESSION['robot_check'] = time(); |
34
|
|
|
|
35
|
|
|
// We cache the spider data for five minutes if we can. |
36
|
|
|
$spider_data = array(); |
37
|
|
|
$cache = Cache::instance(); |
38
|
|
|
if (!$cache->getVar($spider_data, 'spider_search', 300)) |
39
|
|
|
{ |
40
|
|
|
$request = $db->query('', ' |
41
|
|
|
SELECT id_spider, user_agent, ip_info |
42
|
|
|
FROM {db_prefix}spiders |
43
|
|
|
ORDER BY LENGTH(user_agent) DESC', |
44
|
|
|
array( |
45
|
|
|
) |
46
|
|
|
); |
47
|
|
|
while ($row = $db->fetch_assoc($request)) |
48
|
|
|
$spider_data[] = $row; |
49
|
|
|
$db->free_result($request); |
50
|
|
|
|
51
|
|
|
// Save it in the cache |
52
|
|
|
$cache->put('spider_search', $spider_data, 300); |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
if (empty($spider_data)) |
56
|
|
|
return false; |
57
|
|
|
|
58
|
|
|
// We need the user agent |
59
|
|
|
$req = request(); |
60
|
|
|
|
61
|
|
|
// Always attempt IPv6 first. |
62
|
|
|
if (strpos($_SERVER['REMOTE_ADDR'], ':') !== false) |
63
|
|
|
$ip_parts = convertIPv6toInts($_SERVER['REMOTE_ADDR']); |
64
|
|
|
// Then xxx.xxx.xxx.xxx next |
65
|
|
|
else |
66
|
|
|
preg_match('/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/', $_SERVER['REMOTE_ADDR'], $ip_parts); |
67
|
|
|
|
68
|
|
|
foreach ($spider_data as $spider) |
69
|
|
|
{ |
70
|
|
|
// User agent is easy. |
71
|
|
|
if (!empty($spider['user_agent']) && strpos(strtolower($req->user_agent()), strtolower($spider['user_agent'])) !== false) |
72
|
|
|
$_SESSION['id_robot'] = $spider['id_spider']; |
73
|
|
|
// IP stuff is harder. |
74
|
|
|
elseif (!empty($ip_parts)) |
75
|
|
|
{ |
76
|
|
|
$ips = explode(',', $spider['ip_info']); |
77
|
|
|
foreach ($ips as $ip) |
78
|
|
|
{ |
79
|
|
|
$ip = ip2range($ip); |
80
|
|
|
if (!empty($ip)) |
81
|
|
|
{ |
82
|
|
|
foreach ($ip as $key => $value) |
83
|
|
|
{ |
84
|
|
|
if ($value['low'] > $ip_parts[$key + 1] || $value['high'] < $ip_parts[$key + 1]) |
85
|
|
|
break; |
86
|
|
|
elseif (($key == 7 && strpos($_SERVER['REMOTE_ADDR'], ':') !== false) || ($key == 3 && strpos($_SERVER['REMOTE_ADDR'], ':') === false)) |
87
|
|
|
$_SESSION['id_robot'] = $spider['id_spider']; |
88
|
|
|
} |
89
|
|
|
} |
90
|
|
|
} |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
if (isset($_SESSION['id_robot'])) |
94
|
|
|
break; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
// If this is low server tracking then log the spider here as opposed to the main logging function. |
98
|
|
|
if (!empty($modSettings['spider_mode']) && $modSettings['spider_mode'] == 1 && !empty($_SESSION['id_robot'])) |
99
|
|
|
logSpider(); |
100
|
|
|
|
101
|
|
|
return !empty($_SESSION['id_robot']) ? $_SESSION['id_robot'] : 0; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* Log the spider presence online. |
106
|
|
|
* |
107
|
|
|
* @package SearchEngines |
108
|
|
|
*/ |
109
|
|
|
function logSpider() |
110
|
|
|
{ |
111
|
|
|
global $modSettings, $context; |
112
|
|
|
|
113
|
|
|
$db = database(); |
114
|
|
|
|
115
|
|
|
if (empty($modSettings['spider_mode']) || empty($_SESSION['id_robot'])) |
116
|
|
|
return; |
117
|
|
|
|
118
|
|
|
// Attempt to update today's entry. |
119
|
|
|
if ($modSettings['spider_mode'] == 1) |
120
|
|
|
{ |
121
|
|
|
$date = strftime('%Y-%m-%d', forum_time(false)); |
122
|
|
|
$db->query('', ' |
123
|
|
|
UPDATE {db_prefix}log_spider_stats |
124
|
|
|
SET last_seen = {int:current_time}, page_hits = page_hits + 1 |
125
|
|
|
WHERE id_spider = {int:current_spider} |
126
|
|
|
AND stat_date = {date:current_date}', |
127
|
|
|
array( |
128
|
|
|
'current_date' => $date, |
129
|
|
|
'current_time' => time(), |
130
|
|
|
'current_spider' => $_SESSION['id_robot'], |
131
|
|
|
) |
132
|
|
|
); |
133
|
|
|
// Nothing updated? |
134
|
|
|
if ($db->affected_rows() == 0) |
135
|
|
|
{ |
136
|
|
|
$db->insert('ignore', |
137
|
|
|
'{db_prefix}log_spider_stats', |
138
|
|
|
array( |
139
|
|
|
'id_spider' => 'int', 'last_seen' => 'int', 'stat_date' => 'date', 'page_hits' => 'int', |
140
|
|
|
), |
141
|
|
|
array( |
142
|
|
|
$_SESSION['id_robot'], time(), $date, 1, |
143
|
|
|
), |
144
|
|
|
array('id_spider', 'stat_date') |
145
|
|
|
); |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
// If we're tracking better stats than track, better stats - we sort out the today thing later. |
149
|
|
|
else |
150
|
|
|
{ |
151
|
|
|
if ($modSettings['spider_mode'] > 2) |
152
|
|
|
{ |
153
|
|
|
$url = $_GET; |
154
|
|
|
if (isset($context['session_var'])) |
155
|
|
|
unset($url['sesc'], $url[$context['session_var']]); |
156
|
|
|
else |
157
|
|
|
unset($url['sesc']); |
158
|
|
|
$url = serialize($url); |
159
|
|
|
} |
160
|
|
|
else |
161
|
|
|
$url = ''; |
162
|
|
|
|
163
|
|
|
$db->insert('insert', |
164
|
|
|
'{db_prefix}log_spider_hits', |
165
|
|
|
array('id_spider' => 'int', 'log_time' => 'int', 'url' => 'string'), |
166
|
|
|
array($_SESSION['id_robot'], time(), $url), |
167
|
|
|
array() |
168
|
|
|
); |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
/** |
173
|
|
|
* This function takes any unprocessed hits and updates stats accordingly. |
174
|
|
|
* |
175
|
|
|
* @package SearchEngines |
176
|
|
|
*/ |
177
|
|
|
function consolidateSpiderStats() |
178
|
|
|
{ |
179
|
|
|
$db = database(); |
180
|
|
|
|
181
|
|
|
$request = $db->query('consolidate_spider_stats', ' |
182
|
|
|
SELECT id_spider, MAX(log_time) AS last_seen, COUNT(*) AS num_hits |
183
|
|
|
FROM {db_prefix}log_spider_hits |
184
|
|
|
WHERE processed = {int:not_processed} |
185
|
|
|
GROUP BY id_spider, MONTH(log_time), DAYOFMONTH(log_time)', |
186
|
|
|
array( |
187
|
|
|
'not_processed' => 0, |
188
|
|
|
) |
189
|
|
|
); |
190
|
|
|
$spider_hits = array(); |
191
|
|
|
while ($row = $db->fetch_assoc($request)) |
192
|
|
|
$spider_hits[] = $row; |
193
|
|
|
$db->free_result($request); |
194
|
|
|
|
195
|
|
|
if (empty($spider_hits)) |
196
|
|
|
return; |
197
|
|
|
|
198
|
|
|
// Attempt to update the master data. |
199
|
|
|
$stat_inserts = array(); |
200
|
|
|
foreach ($spider_hits as $stat) |
201
|
|
|
{ |
202
|
|
|
// We assume the max date is within the right day. |
203
|
|
|
$date = strftime('%Y-%m-%d', $stat['last_seen']); |
204
|
|
|
$db->query('', ' |
205
|
|
|
UPDATE {db_prefix}log_spider_stats |
206
|
|
|
SET page_hits = page_hits + ' . $stat['num_hits'] . ', |
207
|
|
|
last_seen = CASE WHEN last_seen > {int:last_seen} THEN last_seen ELSE {int:last_seen} END |
208
|
|
|
WHERE id_spider = {int:current_spider} |
209
|
|
|
AND stat_date = {date:last_seen_date}', |
210
|
|
|
array( |
211
|
|
|
'last_seen_date' => $date, |
212
|
|
|
'last_seen' => $stat['last_seen'], |
213
|
|
|
'current_spider' => $stat['id_spider'], |
214
|
|
|
) |
215
|
|
|
); |
216
|
|
|
if ($db->affected_rows() == 0) |
217
|
|
|
$stat_inserts[] = array($date, $stat['id_spider'], $stat['num_hits'], $stat['last_seen']); |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
// New stats? |
221
|
|
|
if (!empty($stat_inserts)) |
222
|
|
|
$db->insert('ignore', |
223
|
|
|
'{db_prefix}log_spider_stats', |
224
|
|
|
array('stat_date' => 'date', 'id_spider' => 'int', 'page_hits' => 'int', 'last_seen' => 'int'), |
225
|
|
|
$stat_inserts, |
226
|
|
|
array('stat_date', 'id_spider') |
227
|
|
|
); |
228
|
|
|
|
229
|
|
|
// All processed. |
230
|
|
|
$db->query('', ' |
231
|
|
|
UPDATE {db_prefix}log_spider_hits |
232
|
|
|
SET processed = {int:is_processed} |
233
|
|
|
WHERE processed = {int:not_processed}', |
234
|
|
|
array( |
235
|
|
|
'is_processed' => 1, |
236
|
|
|
'not_processed' => 0, |
237
|
|
|
) |
238
|
|
|
); |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
/** |
242
|
|
|
* Re cache spider names. |
243
|
|
|
* |
244
|
|
|
* @package SearchEngines |
245
|
|
|
*/ |
246
|
|
View Code Duplication |
function recacheSpiderNames() |
|
|
|
|
247
|
|
|
{ |
248
|
|
|
$db = database(); |
249
|
|
|
|
250
|
|
|
$request = $db->query('', ' |
251
|
|
|
SELECT id_spider, spider_name |
252
|
|
|
FROM {db_prefix}spiders', |
253
|
|
|
array( |
254
|
|
|
) |
255
|
|
|
); |
256
|
|
|
$spiders = array(); |
257
|
|
|
while ($row = $db->fetch_assoc($request)) |
258
|
|
|
$spiders[$row['id_spider']] = $row['spider_name']; |
259
|
|
|
$db->free_result($request); |
260
|
|
|
|
261
|
|
|
updateSettings(array('spider_name_cache' => serialize($spiders))); |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
/** |
265
|
|
|
* Sort the search engine table by user agent name to avoid misidentifying of engine. |
266
|
|
|
* |
267
|
|
|
* @package SearchEngines |
268
|
|
|
* @deprecated since 1.0 - the ordering is done in the query, probably not needed |
269
|
|
|
*/ |
270
|
|
|
function sortSpiderTable() |
271
|
|
|
{ |
272
|
|
|
$db = database(); |
273
|
|
|
|
274
|
|
|
$db->skip_next_error(); |
275
|
|
|
// Order the table by user_agent length. |
276
|
|
|
$db->query('alter_table', ' |
277
|
|
|
ALTER TABLE {db_prefix}spiders |
278
|
|
|
ORDER BY LENGTH(user_agent) DESC', |
279
|
|
|
array() |
280
|
|
|
); |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Return spiders, within the limits specified by parameters |
285
|
|
|
* (used by createList() callbacks) |
286
|
|
|
* |
287
|
|
|
* @package SearchEngines |
288
|
|
|
* @param int $start The item to start with (for pagination purposes) |
289
|
|
|
* @param int $items_per_page The number of items to show per page |
290
|
|
|
* @param string $sort A string indicating how to sort the results |
291
|
|
|
*/ |
292
|
|
View Code Duplication |
function getSpiders($start, $items_per_page, $sort) |
|
|
|
|
293
|
|
|
{ |
294
|
|
|
$db = database(); |
295
|
|
|
|
296
|
|
|
$request = $db->query('', ' |
297
|
|
|
SELECT id_spider, spider_name, user_agent, ip_info |
298
|
|
|
FROM {db_prefix}spiders |
299
|
|
|
ORDER BY {raw:sort} |
300
|
|
|
LIMIT {int:start}, {int:limit}', |
301
|
|
|
array( |
302
|
|
|
'sort' => $sort, |
303
|
|
|
'start' => $start, |
304
|
|
|
'limit' => $items_per_page, |
305
|
|
|
) |
306
|
|
|
); |
307
|
|
|
$spiders = array(); |
308
|
|
|
while ($row = $db->fetch_assoc($request)) |
309
|
|
|
$spiders[$row['id_spider']] = $row; |
310
|
|
|
$db->free_result($request); |
311
|
|
|
|
312
|
|
|
return $spiders; |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
/** |
316
|
|
|
* Return details of one spider from its ID |
317
|
|
|
* |
318
|
|
|
* @package SearchEngines |
319
|
|
|
* @param int $spider_id id of a spider |
320
|
|
|
*/ |
321
|
|
|
function getSpiderDetails($spider_id) |
322
|
|
|
{ |
323
|
|
|
$db = database(); |
324
|
|
|
|
325
|
|
|
$request = $db->query('', ' |
326
|
|
|
SELECT id_spider as id, spider_name as name, user_agent as agent, ip_info |
327
|
|
|
FROM {db_prefix}spiders |
328
|
|
|
WHERE id_spider = {int:current_spider}', |
329
|
|
|
array( |
330
|
|
|
'current_spider' => $spider_id, |
331
|
|
|
) |
332
|
|
|
); |
333
|
|
|
$spider = $db->fetch_assoc($request); |
334
|
|
|
|
335
|
|
|
$db->free_result($request); |
336
|
|
|
|
337
|
|
|
return $spider; |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
/** |
341
|
|
|
* Return the registered spiders count. |
342
|
|
|
* (used by createList() callbacks) |
343
|
|
|
* |
344
|
|
|
* @package SearchEngines |
345
|
|
|
* @return int |
346
|
|
|
*/ |
347
|
|
|
function getNumSpiders() |
348
|
|
|
{ |
349
|
|
|
$db = database(); |
350
|
|
|
|
351
|
|
|
$request = $db->query('', ' |
352
|
|
|
SELECT COUNT(*) AS num_spiders |
353
|
|
|
FROM {db_prefix}spiders', |
354
|
|
|
array( |
355
|
|
|
) |
356
|
|
|
); |
357
|
|
|
list ($numSpiders) = $db->fetch_row($request); |
358
|
|
|
$db->free_result($request); |
359
|
|
|
|
360
|
|
|
return $numSpiders; |
361
|
|
|
} |
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* Retrieve spider logs within the specified limits. |
365
|
|
|
* |
366
|
|
|
* - (used by createList() callbacks) |
367
|
|
|
* |
368
|
|
|
* @package SearchEngines |
369
|
|
|
* @param int $start The item to start with (for pagination purposes) |
370
|
|
|
* @param int $items_per_page The number of items to show per page |
371
|
|
|
* @param string $sort A string indicating how to sort the results |
372
|
|
|
* @return array An array of spider hits |
373
|
|
|
*/ |
374
|
|
View Code Duplication |
function getSpiderLogs($start, $items_per_page, $sort) |
|
|
|
|
375
|
|
|
{ |
376
|
|
|
$db = database(); |
377
|
|
|
|
378
|
|
|
$request = $db->query('', ' |
379
|
|
|
SELECT sl.id_spider, sl.url, sl.log_time, s.spider_name |
380
|
|
|
FROM {db_prefix}log_spider_hits AS sl |
381
|
|
|
INNER JOIN {db_prefix}spiders AS s ON (s.id_spider = sl.id_spider) |
382
|
|
|
ORDER BY ' . $sort . ' |
383
|
|
|
LIMIT ' . $start . ', ' . $items_per_page, |
384
|
|
|
array( |
385
|
|
|
) |
386
|
|
|
); |
387
|
|
|
$spider_logs = array(); |
388
|
|
|
while ($row = $db->fetch_assoc($request)) |
389
|
|
|
$spider_logs[] = $row; |
390
|
|
|
$db->free_result($request); |
391
|
|
|
|
392
|
|
|
return $spider_logs; |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
/** |
396
|
|
|
* Returns the count of spider logs. |
397
|
|
|
* (used by createList() callbacks) |
398
|
|
|
* |
399
|
|
|
* @package SearchEngines |
400
|
|
|
* @return int The number of rows in the log_spider_hits table |
401
|
|
|
*/ |
402
|
|
|
function getNumSpiderLogs() |
403
|
|
|
{ |
404
|
|
|
$db = database(); |
405
|
|
|
|
406
|
|
|
$request = $db->query('', ' |
407
|
|
|
SELECT COUNT(*) AS num_logs |
408
|
|
|
FROM {db_prefix}log_spider_hits', |
409
|
|
|
array( |
410
|
|
|
) |
411
|
|
|
); |
412
|
|
|
list ($numLogs) = $db->fetch_row($request); |
413
|
|
|
$db->free_result($request); |
414
|
|
|
|
415
|
|
|
return $numLogs; |
416
|
|
|
} |
417
|
|
|
|
418
|
|
|
/** |
419
|
|
|
* Get a list of spider stats from the log_spider table within the specified |
420
|
|
|
* limits. |
421
|
|
|
* (used by createList() callbacks) |
422
|
|
|
* |
423
|
|
|
* @package SearchEngines |
424
|
|
|
* @param int $start The item to start with (for pagination purposes) |
425
|
|
|
* @param int $items_per_page The number of items to show per page |
426
|
|
|
* @param string $sort A string indicating how to sort the results |
427
|
|
|
*/ |
428
|
|
View Code Duplication |
function getSpiderStats($start, $items_per_page, $sort) |
|
|
|
|
429
|
|
|
{ |
430
|
|
|
$db = database(); |
431
|
|
|
|
432
|
|
|
$request = $db->query('', ' |
433
|
|
|
SELECT ss.id_spider, ss.stat_date, ss.page_hits, s.spider_name |
434
|
|
|
FROM {db_prefix}log_spider_stats AS ss |
435
|
|
|
INNER JOIN {db_prefix}spiders AS s ON (s.id_spider = ss.id_spider) |
436
|
|
|
ORDER BY ' . $sort . ' |
437
|
|
|
LIMIT ' . $start . ', ' . $items_per_page, |
438
|
|
|
array( |
439
|
|
|
) |
440
|
|
|
); |
441
|
|
|
$spider_stats = array(); |
442
|
|
|
while ($row = $db->fetch_assoc($request)) |
443
|
|
|
$spider_stats[] = $row; |
444
|
|
|
$db->free_result($request); |
445
|
|
|
|
446
|
|
|
return $spider_stats; |
447
|
|
|
} |
448
|
|
|
|
449
|
|
|
/** |
450
|
|
|
* Get the number of spider stat rows from the log spider stats table |
451
|
|
|
* (used by createList() callbacks) |
452
|
|
|
* |
453
|
|
|
* @package SearchEngines |
454
|
|
|
* @param int|null $time (optional) if specified counts only the entries before that date |
455
|
|
|
* @return int The number of rows in the log_spider_stats table |
456
|
|
|
*/ |
457
|
|
View Code Duplication |
function getNumSpiderStats($time = null) |
|
|
|
|
458
|
|
|
{ |
459
|
|
|
$db = database(); |
460
|
|
|
|
461
|
|
|
$request = $db->query('', ' |
462
|
|
|
SELECT COUNT(*) |
463
|
|
|
FROM {db_prefix}log_spider_stats' . ($time === null ? '' : ' |
464
|
|
|
WHERE stat_date < {date:date_being_viewed}'), |
465
|
|
|
array( |
466
|
|
|
'date_being_viewed' => $time, |
467
|
|
|
) |
468
|
|
|
); |
469
|
|
|
list ($numStats) = $db->fetch_row($request); |
470
|
|
|
$db->free_result($request); |
471
|
|
|
|
472
|
|
|
return $numStats; |
473
|
|
|
} |
474
|
|
|
|
475
|
|
|
/** |
476
|
|
|
* Remove spider logs older than the passed time |
477
|
|
|
* |
478
|
|
|
* @package SearchEngines |
479
|
|
|
* @param int $time a time value |
480
|
|
|
*/ |
481
|
|
|
function removeSpiderOldLogs($time) |
482
|
|
|
{ |
483
|
1 |
|
$db = database(); |
484
|
|
|
|
485
|
|
|
// Delete the entries. |
486
|
1 |
|
$db->query('', ' |
487
|
|
|
DELETE FROM {db_prefix}log_spider_hits |
488
|
1 |
|
WHERE log_time < {int:delete_period}', |
489
|
|
|
array( |
490
|
1 |
|
'delete_period' => $time, |
491
|
|
|
) |
492
|
1 |
|
); |
493
|
1 |
|
} |
494
|
|
|
|
495
|
|
|
/** |
496
|
|
|
* Remove spider logs older than the passed time |
497
|
|
|
* |
498
|
|
|
* @package SearchEngines |
499
|
|
|
* @param int $time a time value |
500
|
|
|
*/ |
501
|
|
|
function removeSpiderOldStats($time) |
502
|
|
|
{ |
503
|
|
|
$db = database(); |
504
|
|
|
|
505
|
|
|
// Delete the entries. |
506
|
|
|
$db->query('', ' |
507
|
|
|
DELETE FROM {db_prefix}log_spider_stats |
508
|
|
|
WHERE last_seen < {int:delete_period}', |
509
|
|
|
array( |
510
|
|
|
'delete_period' => $time, |
511
|
|
|
) |
512
|
|
|
); |
513
|
|
|
} |
514
|
|
|
|
515
|
|
|
/** |
516
|
|
|
* Remove all the entries connected to a certain spider (description, entries, stats) |
517
|
|
|
* |
518
|
|
|
* @package SearchEngines |
519
|
|
|
* @param int[] $spiders_id an array of spider ids |
520
|
|
|
*/ |
521
|
|
|
function removeSpiders($spiders_id) |
522
|
|
|
{ |
523
|
|
|
$db = database(); |
524
|
|
|
|
525
|
|
|
$db->query('', ' |
526
|
|
|
DELETE FROM {db_prefix}spiders |
527
|
|
|
WHERE id_spider IN ({array_int:remove_list})', |
528
|
|
|
array( |
529
|
|
|
'remove_list' => $spiders_id, |
530
|
|
|
) |
531
|
|
|
); |
532
|
|
|
$db->query('', ' |
533
|
|
|
DELETE FROM {db_prefix}log_spider_hits |
534
|
|
|
WHERE id_spider IN ({array_int:remove_list})', |
535
|
|
|
array( |
536
|
|
|
'remove_list' => $spiders_id, |
537
|
|
|
) |
538
|
|
|
); |
539
|
|
|
$db->query('', ' |
540
|
|
|
DELETE FROM {db_prefix}log_spider_stats |
541
|
|
|
WHERE id_spider IN ({array_int:remove_list})', |
542
|
|
|
array( |
543
|
|
|
'remove_list' => $spiders_id, |
544
|
|
|
) |
545
|
|
|
); |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
/** |
549
|
|
|
* Returns the last time any spider was seen around |
550
|
|
|
* |
551
|
|
|
* @package SearchEngines |
552
|
|
|
*/ |
553
|
|
|
function spidersLastSeen() |
554
|
|
|
{ |
555
|
|
|
$db = database(); |
556
|
|
|
|
557
|
|
|
$request = $db->query('', ' |
558
|
|
|
SELECT id_spider, MAX(last_seen) AS last_seen_time |
559
|
|
|
FROM {db_prefix}log_spider_stats |
560
|
|
|
GROUP BY id_spider', |
561
|
|
|
array( |
562
|
|
|
) |
563
|
|
|
); |
564
|
|
|
|
565
|
|
|
$spider_last_seen = array(); |
566
|
|
|
while ($row = $db->fetch_assoc($request)) |
567
|
|
|
$spider_last_seen[$row['id_spider']] = $row['last_seen_time']; |
568
|
|
|
$db->free_result($request); |
569
|
|
|
|
570
|
|
|
return $spider_last_seen; |
571
|
|
|
} |
572
|
|
|
|
573
|
|
|
/** |
574
|
|
|
* Returns an array of dates ranging from the first appearance of a spider and the last |
575
|
|
|
* |
576
|
|
|
* @package SearchEngines |
577
|
|
|
*/ |
578
|
|
|
function spidersStatsDates() |
579
|
|
|
{ |
580
|
|
|
global $txt; |
581
|
|
|
|
582
|
|
|
$db = database(); |
583
|
|
|
|
584
|
|
|
// Get the earliest and latest dates. |
585
|
|
|
$request = $db->query('', ' |
586
|
|
|
SELECT MIN(stat_date) AS first_date, MAX(stat_date) AS last_date |
587
|
|
|
FROM {db_prefix}log_spider_stats', |
588
|
|
|
array( |
589
|
|
|
) |
590
|
|
|
); |
591
|
|
|
|
592
|
|
|
list ($min_date, $max_date) = $db->fetch_row($request); |
593
|
|
|
$db->free_result($request); |
594
|
|
|
|
595
|
|
|
$min_year = (int) substr($min_date, 0, 4); |
596
|
|
|
$max_year = (int) substr($max_date, 0, 4); |
597
|
|
|
$min_month = (int) substr($min_date, 5, 2); |
598
|
|
|
$max_month = (int) substr($max_date, 5, 2); |
599
|
|
|
|
600
|
|
|
// Prepare the dates for the drop down. |
601
|
|
|
$date_choices = array(); |
602
|
|
|
for ($y = $min_year; $y <= $max_year; $y++) |
603
|
|
|
for ($m = 1; $m <= 12; $m++) |
604
|
|
|
{ |
605
|
|
|
// This doesn't count? |
606
|
|
|
if ($y == $min_year && $m < $min_month) |
607
|
|
|
continue; |
608
|
|
|
if ($y == $max_year && $m > $max_month) |
609
|
|
|
break; |
610
|
|
|
|
611
|
|
|
$date_choices[$y . $m] = $txt['months_short'][$m] . ' ' . $y; |
612
|
|
|
} |
613
|
|
|
|
614
|
|
|
return $date_choices; |
615
|
|
|
} |
616
|
|
|
|
617
|
|
|
/** |
618
|
|
|
* Update an existing or inserts a new spider entry |
619
|
|
|
* |
620
|
|
|
* @package SearchEngines |
621
|
|
|
* @param int $id |
622
|
|
|
* @param string $name spider name |
623
|
|
|
* @param string $agent ua of the spider |
624
|
|
|
* @param string $info_ip |
625
|
|
|
*/ |
626
|
|
|
function updateSpider($id = 0, $name = '', $agent = '', $info_ip = '') |
627
|
|
|
{ |
628
|
|
|
$db = database(); |
629
|
|
|
|
630
|
|
|
// New spider, insert |
631
|
|
|
if (empty($id)) |
632
|
|
|
$db->insert('insert', |
633
|
|
|
'{db_prefix}spiders', |
634
|
|
|
array( |
635
|
|
|
'spider_name' => 'string', 'user_agent' => 'string', 'ip_info' => 'string', |
636
|
|
|
), |
637
|
|
|
array( |
638
|
|
|
$name, $agent, $info_ip, |
639
|
|
|
), |
640
|
|
|
array('id_spider') |
641
|
|
|
); |
642
|
|
|
// Existing spider update |
643
|
|
|
else |
644
|
|
|
$db->query('', ' |
645
|
|
|
UPDATE {db_prefix}spiders |
646
|
|
|
SET spider_name = {string:spider_name}, user_agent = {string:spider_agent}, |
647
|
|
|
ip_info = {string:ip_info} |
648
|
|
|
WHERE id_spider = {int:current_spider}', |
649
|
|
|
array( |
650
|
|
|
'current_spider' => $id, |
651
|
|
|
'spider_name' => $name, |
652
|
|
|
'spider_agent' => $agent, |
653
|
|
|
'ip_info' => $info_ip, |
654
|
|
|
) |
655
|
|
|
); |
656
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.