Completed
Pull Request — patch_1-1-4 (#3202)
by Spuds
15:49
created

SearchEngines.subs.php ➔ removeSpiderOldLogs()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 13
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 5
nc 1
nop 1
dl 0
loc 13
ccs 6
cts 6
cp 1
crap 1
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * This file contains all the screens that relate to search engines.
5
 *
6
 * @name      ElkArte Forum
7
 * @copyright ElkArte Forum contributors
8
 * @license   BSD http://opensource.org/licenses/BSD-3-Clause
9
 *
10
 * This file contains code covered by:
11
 * copyright:	2011 Simple Machines (http://www.simplemachines.org)
12
 * license:  	BSD, See included LICENSE.TXT for terms and conditions.
13
 *
14
 * @version 1.1
15
 *
16
 */
17
18
/**
19
 * Do we think the current user is a spider?
20
 *
21
 * @package SearchEngines
22
 * @return int
23
 */
24
function spiderCheck()
25
{
26
	global $modSettings;
27
28
	$db = database();
29
30
	if (isset($_SESSION['id_robot']))
31
		unset($_SESSION['id_robot']);
32
33
	$_SESSION['robot_check'] = time();
34
35
	// We cache the spider data for five minutes if we can.
36
	$spider_data = array();
37
	$cache = Cache::instance();
38
	if (!$cache->getVar($spider_data, 'spider_search', 300))
39
	{
40
		$request = $db->query('', '
41
			SELECT id_spider, user_agent, ip_info
42
			FROM {db_prefix}spiders
43
			ORDER BY LENGTH(user_agent) DESC',
44
			array(
45
			)
46
		);
47
		while ($row = $db->fetch_assoc($request))
48
			$spider_data[] = $row;
49
		$db->free_result($request);
50
51
		// Save it in the cache
52
		$cache->put('spider_search', $spider_data, 300);
53
	}
54
55
	if (empty($spider_data))
56
		return false;
57
58
	// We need the user agent
59
	$req = request();
60
61
	// Always attempt IPv6 first.
62
	if (strpos($_SERVER['REMOTE_ADDR'], ':') !== false)
63
		$ip_parts = convertIPv6toInts($_SERVER['REMOTE_ADDR']);
64
	// Then xxx.xxx.xxx.xxx next
65
	else
66
		preg_match('/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/', $_SERVER['REMOTE_ADDR'], $ip_parts);
67
68
	foreach ($spider_data as $spider)
69
	{
70
		// User agent is easy.
71
		if (!empty($spider['user_agent']) && strpos(strtolower($req->user_agent()), strtolower($spider['user_agent'])) !== false)
72
			$_SESSION['id_robot'] = $spider['id_spider'];
73
		// IP stuff is harder.
74
		elseif (!empty($ip_parts))
75
		{
76
			$ips = explode(',', $spider['ip_info']);
77
			foreach ($ips as $ip)
78
			{
79
				$ip = ip2range($ip);
80
				if (!empty($ip))
81
				{
82
					foreach ($ip as $key => $value)
83
					{
84
						if ($value['low'] > $ip_parts[$key + 1] || $value['high'] < $ip_parts[$key + 1])
85
							break;
86
						elseif (($key == 7 && strpos($_SERVER['REMOTE_ADDR'], ':') !== false) || ($key == 3 && strpos($_SERVER['REMOTE_ADDR'], ':') === false))
87
							$_SESSION['id_robot'] = $spider['id_spider'];
88
					}
89
				}
90
			}
91
		}
92
93
		if (isset($_SESSION['id_robot']))
94
			break;
95
	}
96
97
	// If this is low server tracking then log the spider here as opposed to the main logging function.
98
	if (!empty($modSettings['spider_mode']) && $modSettings['spider_mode'] == 1 && !empty($_SESSION['id_robot']))
99
		logSpider();
100
101
	return !empty($_SESSION['id_robot']) ? $_SESSION['id_robot'] : 0;
102
}
103
104
/**
105
 * Log the spider presence online.
106
 *
107
 * @package SearchEngines
108
 */
109
function logSpider()
110
{
111
	global $modSettings, $context;
112
113
	$db = database();
114
115
	if (empty($modSettings['spider_mode']) || empty($_SESSION['id_robot']))
116
		return;
117
118
	// Attempt to update today's entry.
119
	if ($modSettings['spider_mode'] == 1)
120
	{
121
		$date = strftime('%Y-%m-%d', forum_time(false));
122
		$db->query('', '
123
			UPDATE {db_prefix}log_spider_stats
124
			SET last_seen = {int:current_time}, page_hits = page_hits + 1
125
			WHERE id_spider = {int:current_spider}
126
				AND stat_date = {date:current_date}',
127
			array(
128
				'current_date' => $date,
129
				'current_time' => time(),
130
				'current_spider' => $_SESSION['id_robot'],
131
			)
132
		);
133
		// Nothing updated?
134
		if ($db->affected_rows() == 0)
135
		{
136
			$db->insert('ignore',
137
				'{db_prefix}log_spider_stats',
138
				array(
139
					'id_spider' => 'int', 'last_seen' => 'int', 'stat_date' => 'date', 'page_hits' => 'int',
140
				),
141
				array(
142
					$_SESSION['id_robot'], time(), $date, 1,
143
				),
144
				array('id_spider', 'stat_date')
145
			);
146
		}
147
	}
148
	// If we're tracking better stats than track, better stats - we sort out the today thing later.
149
	else
150
	{
151
		if ($modSettings['spider_mode'] > 2)
152
		{
153
			$url = $_GET;
154
			if (isset($context['session_var']))
155
				unset($url['sesc'], $url[$context['session_var']]);
156
			else
157
				unset($url['sesc']);
158
			$url = serialize($url);
159
		}
160
		else
161
			$url = '';
162
163
		$db->insert('insert',
164
			'{db_prefix}log_spider_hits',
165
			array('id_spider' => 'int', 'log_time' => 'int', 'url' => 'string'),
166
			array($_SESSION['id_robot'], time(), $url),
167
			array()
168
		);
169
	}
170
}
171
172
/**
173
 * This function takes any unprocessed hits and updates stats accordingly.
174
 *
175
 * @package SearchEngines
176
 */
177
function consolidateSpiderStats()
178
{
179
	$db = database();
180
181
	$request = $db->query('consolidate_spider_stats', '
182
		SELECT id_spider, MAX(log_time) AS last_seen, COUNT(*) AS num_hits
183
		FROM {db_prefix}log_spider_hits
184
		WHERE processed = {int:not_processed}
185
		GROUP BY id_spider, MONTH(log_time), DAYOFMONTH(log_time)',
186
		array(
187
			'not_processed' => 0,
188
		)
189
	);
190
	$spider_hits = array();
191
	while ($row = $db->fetch_assoc($request))
192
		$spider_hits[] = $row;
193
	$db->free_result($request);
194
195
	if (empty($spider_hits))
196
		return;
197
198
	// Attempt to update the master data.
199
	$stat_inserts = array();
200
	foreach ($spider_hits as $stat)
201
	{
202
		// We assume the max date is within the right day.
203
		$date = strftime('%Y-%m-%d', $stat['last_seen']);
204
		$db->query('', '
205
			UPDATE {db_prefix}log_spider_stats
206
			SET page_hits = page_hits + ' . $stat['num_hits'] . ',
207
				last_seen = CASE WHEN last_seen > {int:last_seen} THEN last_seen ELSE {int:last_seen} END
208
			WHERE id_spider = {int:current_spider}
209
				AND stat_date = {date:last_seen_date}',
210
			array(
211
				'last_seen_date' => $date,
212
				'last_seen' => $stat['last_seen'],
213
				'current_spider' => $stat['id_spider'],
214
			)
215
		);
216
		if ($db->affected_rows() == 0)
217
			$stat_inserts[] = array($date, $stat['id_spider'], $stat['num_hits'], $stat['last_seen']);
218
	}
219
220
	// New stats?
221
	if (!empty($stat_inserts))
222
		$db->insert('ignore',
223
			'{db_prefix}log_spider_stats',
224
			array('stat_date' => 'date', 'id_spider' => 'int', 'page_hits' => 'int', 'last_seen' => 'int'),
225
			$stat_inserts,
226
			array('stat_date', 'id_spider')
227
		);
228
229
	// All processed.
230
	$db->query('', '
231
		UPDATE {db_prefix}log_spider_hits
232
		SET processed = {int:is_processed}
233
		WHERE processed = {int:not_processed}',
234
		array(
235
			'is_processed' => 1,
236
			'not_processed' => 0,
237
		)
238
	);
239
}
240
241
/**
242
 * Re cache spider names.
243
 *
244
 * @package SearchEngines
245
 */
246 View Code Duplication
function recacheSpiderNames()
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
247
{
248
	$db = database();
249
250
	$request = $db->query('', '
251
		SELECT id_spider, spider_name
252
		FROM {db_prefix}spiders',
253
		array(
254
		)
255
	);
256
	$spiders = array();
257
	while ($row = $db->fetch_assoc($request))
258
		$spiders[$row['id_spider']] = $row['spider_name'];
259
	$db->free_result($request);
260
261
	updateSettings(array('spider_name_cache' => serialize($spiders)));
262
}
263
264
/**
265
 * Sort the search engine table by user agent name to avoid misidentifying of engine.
266
 *
267
 * @package SearchEngines
268
 * @deprecated since 1.0 - the ordering is done in the query, probably not needed
269
 */
270
function sortSpiderTable()
271
{
272
	$db = database();
273
274
	$db->skip_next_error();
275
	// Order the table by user_agent length.
276
	$db->query('alter_table', '
277
		ALTER TABLE {db_prefix}spiders
278
		ORDER BY LENGTH(user_agent) DESC',
279
		array()
280
	);
281
}
282
283
/**
284
 * Return spiders, within the limits specified by parameters
285
 * (used by createList() callbacks)
286
 *
287
 * @package SearchEngines
288
 * @param int $start The item to start with (for pagination purposes)
289
 * @param int $items_per_page  The number of items to show per page
290
 * @param string $sort A string indicating how to sort the results
291
 */
292 View Code Duplication
function getSpiders($start, $items_per_page, $sort)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
293
{
294
	$db = database();
295
296
	$request = $db->query('', '
297
		SELECT id_spider, spider_name, user_agent, ip_info
298
		FROM {db_prefix}spiders
299
		ORDER BY {raw:sort}
300
		LIMIT {int:start}, {int:limit}',
301
		array(
302
			'sort' => $sort,
303
			'start' => $start,
304
			'limit' => $items_per_page,
305
		)
306
	);
307
	$spiders = array();
308
	while ($row = $db->fetch_assoc($request))
309
		$spiders[$row['id_spider']] = $row;
310
	$db->free_result($request);
311
312
	return $spiders;
313
}
314
315
/**
316
 * Return details of one spider from its ID
317
 *
318
 * @package SearchEngines
319
 * @param int $spider_id id of a spider
320
 */
321
function getSpiderDetails($spider_id)
322
{
323
	$db = database();
324
325
	$request = $db->query('', '
326
		SELECT id_spider as id, spider_name as name, user_agent as agent, ip_info
327
		FROM {db_prefix}spiders
328
		WHERE id_spider = {int:current_spider}',
329
		array(
330
			'current_spider' => $spider_id,
331
		)
332
	);
333
	$spider = $db->fetch_assoc($request);
334
335
	$db->free_result($request);
336
337
	return $spider;
338
}
339
340
/**
341
 * Return the registered spiders count.
342
 * (used by createList() callbacks)
343
 *
344
 * @package SearchEngines
345
 * @return int
346
 */
347
function getNumSpiders()
348
{
349
	$db = database();
350
351
	$request = $db->query('', '
352
		SELECT COUNT(*) AS num_spiders
353
		FROM {db_prefix}spiders',
354
		array(
355
		)
356
	);
357
	list ($numSpiders) = $db->fetch_row($request);
358
	$db->free_result($request);
359
360
	return $numSpiders;
361
}
362
363
/**
364
 * Retrieve spider logs within the specified limits.
365
 *
366
 * - (used by createList() callbacks)
367
 *
368
 * @package SearchEngines
369
 * @param int $start The item to start with (for pagination purposes)
370
 * @param int $items_per_page The number of items to show per page
371
 * @param string $sort A string indicating how to sort the results
372
 * @return array An array of spider hits
373
 */
374 View Code Duplication
function getSpiderLogs($start, $items_per_page, $sort)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
375
{
376
	$db = database();
377
378
	$request = $db->query('', '
379
		SELECT sl.id_spider, sl.url, sl.log_time, s.spider_name
380
		FROM {db_prefix}log_spider_hits AS sl
381
			INNER JOIN {db_prefix}spiders AS s ON (s.id_spider = sl.id_spider)
382
		ORDER BY ' . $sort . '
383
		LIMIT ' . $start . ', ' . $items_per_page,
384
		array(
385
		)
386
	);
387
	$spider_logs = array();
388
	while ($row = $db->fetch_assoc($request))
389
		$spider_logs[] = $row;
390
	$db->free_result($request);
391
392
	return $spider_logs;
393
}
394
395
/**
396
 * Returns the count of spider logs.
397
 * (used by createList() callbacks)
398
 *
399
 * @package SearchEngines
400
 * @return int The number of rows in the log_spider_hits table
401
 */
402
function getNumSpiderLogs()
403
{
404
	$db = database();
405
406
	$request = $db->query('', '
407
		SELECT COUNT(*) AS num_logs
408
		FROM {db_prefix}log_spider_hits',
409
		array(
410
		)
411
	);
412
	list ($numLogs) = $db->fetch_row($request);
413
	$db->free_result($request);
414
415
	return $numLogs;
416
}
417
418
/**
419
 * Get a list of spider stats from the log_spider table within the specified
420
 * limits.
421
 * (used by createList() callbacks)
422
 *
423
 * @package SearchEngines
424
 * @param int $start The item to start with (for pagination purposes)
425
 * @param int $items_per_page The number of items to show per page
426
 * @param string $sort A string indicating how to sort the results
427
 */
428 View Code Duplication
function getSpiderStats($start, $items_per_page, $sort)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
429
{
430
	$db = database();
431
432
	$request = $db->query('', '
433
		SELECT ss.id_spider, ss.stat_date, ss.page_hits, s.spider_name
434
		FROM {db_prefix}log_spider_stats AS ss
435
			INNER JOIN {db_prefix}spiders AS s ON (s.id_spider = ss.id_spider)
436
		ORDER BY ' . $sort . '
437
		LIMIT ' . $start . ', ' . $items_per_page,
438
		array(
439
		)
440
	);
441
	$spider_stats = array();
442
	while ($row = $db->fetch_assoc($request))
443
		$spider_stats[] = $row;
444
	$db->free_result($request);
445
446
	return $spider_stats;
447
}
448
449
/**
450
 * Get the number of spider stat rows from the log spider stats table
451
 * (used by createList() callbacks)
452
 *
453
 * @package SearchEngines
454
 * @param int|null $time (optional) if specified counts only the entries before that date
455
 * @return int The number of rows in the log_spider_stats table
456
 */
457 View Code Duplication
function getNumSpiderStats($time = null)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
458
{
459
	$db = database();
460
461
	$request = $db->query('', '
462
		SELECT COUNT(*)
463
		FROM {db_prefix}log_spider_stats' . ($time === null ? '' : '
464
		WHERE stat_date < {date:date_being_viewed}'),
465
		array(
466
			'date_being_viewed' => $time,
467
		)
468
	);
469
	list ($numStats) = $db->fetch_row($request);
470
	$db->free_result($request);
471
472
	return $numStats;
473
}
474
475
/**
476
 * Remove spider logs older than the passed time
477
 *
478
 * @package SearchEngines
479
 * @param int $time a time value
480
 */
481
function removeSpiderOldLogs($time)
482
{
483 1
	$db = database();
484
485
	// Delete the entries.
486 1
	$db->query('', '
487
		DELETE FROM {db_prefix}log_spider_hits
488 1
		WHERE log_time < {int:delete_period}',
489
		array(
490 1
			'delete_period' => $time,
491
		)
492 1
	);
493 1
}
494
495
/**
496
 * Remove spider logs older than the passed time
497
 *
498
 * @package SearchEngines
499
 * @param int $time a time value
500
 */
501
function removeSpiderOldStats($time)
502
{
503
	$db = database();
504
505
	// Delete the entries.
506
	$db->query('', '
507
		DELETE FROM {db_prefix}log_spider_stats
508
		WHERE last_seen < {int:delete_period}',
509
		array(
510
			'delete_period' => $time,
511
		)
512
	);
513
}
514
515
/**
516
 * Remove all the entries connected to a certain spider (description, entries, stats)
517
 *
518
 * @package SearchEngines
519
 * @param int[] $spiders_id an array of spider ids
520
 */
521
function removeSpiders($spiders_id)
522
{
523
	$db = database();
524
525
	$db->query('', '
526
		DELETE FROM {db_prefix}spiders
527
		WHERE id_spider IN ({array_int:remove_list})',
528
		array(
529
			'remove_list' => $spiders_id,
530
		)
531
	);
532
	$db->query('', '
533
		DELETE FROM {db_prefix}log_spider_hits
534
		WHERE id_spider IN ({array_int:remove_list})',
535
		array(
536
			'remove_list' => $spiders_id,
537
		)
538
	);
539
	$db->query('', '
540
		DELETE FROM {db_prefix}log_spider_stats
541
		WHERE id_spider IN ({array_int:remove_list})',
542
		array(
543
			'remove_list' => $spiders_id,
544
		)
545
	);
546
}
547
548
/**
549
 * Returns the last time any spider was seen around
550
 *
551
 * @package SearchEngines
552
 */
553
function spidersLastSeen()
554
{
555
	$db = database();
556
557
	$request = $db->query('', '
558
		SELECT id_spider, MAX(last_seen) AS last_seen_time
559
		FROM {db_prefix}log_spider_stats
560
		GROUP BY id_spider',
561
		array(
562
		)
563
	);
564
565
	$spider_last_seen = array();
566
	while ($row = $db->fetch_assoc($request))
567
		$spider_last_seen[$row['id_spider']] = $row['last_seen_time'];
568
	$db->free_result($request);
569
570
	return $spider_last_seen;
571
}
572
573
/**
574
 * Returns an array of dates ranging from the first appearance of a spider and the last
575
 *
576
 * @package SearchEngines
577
 */
578
function spidersStatsDates()
579
{
580
	global $txt;
581
582
	$db = database();
583
584
	// Get the earliest and latest dates.
585
	$request = $db->query('', '
586
		SELECT MIN(stat_date) AS first_date, MAX(stat_date) AS last_date
587
		FROM {db_prefix}log_spider_stats',
588
		array(
589
		)
590
	);
591
592
	list ($min_date, $max_date) = $db->fetch_row($request);
593
	$db->free_result($request);
594
595
	$min_year = (int) substr($min_date, 0, 4);
596
	$max_year = (int) substr($max_date, 0, 4);
597
	$min_month = (int) substr($min_date, 5, 2);
598
	$max_month = (int) substr($max_date, 5, 2);
599
600
	// Prepare the dates for the drop down.
601
	$date_choices = array();
602
	for ($y = $min_year; $y <= $max_year; $y++)
603
		for ($m = 1; $m <= 12; $m++)
604
		{
605
			// This doesn't count?
606
			if ($y == $min_year && $m < $min_month)
607
				continue;
608
			if ($y == $max_year && $m > $max_month)
609
				break;
610
611
			$date_choices[$y . $m] = $txt['months_short'][$m] . ' ' . $y;
612
		}
613
614
	return $date_choices;
615
}
616
617
/**
618
 * Update an existing or inserts a new spider entry
619
 *
620
 * @package SearchEngines
621
 * @param int $id
622
 * @param string $name spider name
623
 * @param string $agent ua of the spider
624
 * @param string $info_ip
625
 */
626
function updateSpider($id = 0, $name = '', $agent = '', $info_ip = '')
627
{
628
	$db = database();
629
630
	// New spider, insert
631
	if (empty($id))
632
		$db->insert('insert',
633
			'{db_prefix}spiders',
634
			array(
635
				'spider_name' => 'string', 'user_agent' => 'string', 'ip_info' => 'string',
636
			),
637
			array(
638
				$name, $agent, $info_ip,
639
			),
640
			array('id_spider')
641
		);
642
	// Existing spider update
643
	else
644
		$db->query('', '
645
			UPDATE {db_prefix}spiders
646
			SET spider_name = {string:spider_name}, user_agent = {string:spider_agent},
647
				ip_info = {string:ip_info}
648
			WHERE id_spider = {int:current_spider}',
649
			array(
650
				'current_spider' => $id,
651
				'spider_name' => $name,
652
				'spider_agent' => $agent,
653
				'ip_info' => $info_ip,
654
			)
655
		);
656
}