1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* vipnytt/RobotsTxtParser |
4
|
|
|
* |
5
|
|
|
* @link https://github.com/VIPnytt/RobotsTxtParser |
6
|
|
|
* @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT) |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
namespace vipnytt\RobotsTxtParser\Client\Directives; |
10
|
|
|
|
11
|
|
|
use DateTime; |
12
|
|
|
use DateTimeZone; |
13
|
|
|
use vipnytt\RobotsTxtParser\Handler\ErrorHandler; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Trait DirectiveClientTrait |
17
|
|
|
* |
18
|
|
|
* @package vipnytt\RobotsTxtParser\Client\Directives |
19
|
|
|
*/ |
20
|
|
|
trait DirectiveClientTrait |
21
|
|
|
{ |
22
|
|
|
/** |
23
|
|
|
* Is time between |
24
|
|
|
* |
25
|
|
|
* @param int $timestamp |
26
|
|
|
* @param string $fromTime |
27
|
|
|
* @param string $toTime |
28
|
|
|
* @param string $format |
29
|
|
|
* @return bool |
30
|
|
|
*/ |
31
|
|
|
private function isBetween($timestamp, $fromTime, $toTime, $format = 'Hi') |
32
|
|
|
{ |
33
|
|
|
$dateTime = new DateTime(); |
34
|
|
|
$timezone = new DateTimeZone('UTC'); |
35
|
|
|
$dtRef = $dateTime->createFromFormat('U', $timestamp, $timezone); |
36
|
|
|
$dtFrom = $dateTime->createFromFormat($format, $fromTime, $timezone); |
37
|
|
|
$dtTo = $dateTime->createFromFormat($format, $toTime, $timezone); |
38
|
|
|
if ($dtFrom > $dtTo) { |
39
|
|
|
$dtTo->modify('+1 day'); |
40
|
|
|
} |
41
|
|
|
return ( |
42
|
|
|
$dtFrom <= $dtRef && |
43
|
|
|
$dtRef <= $dtTo |
44
|
|
|
) || ( |
45
|
|
|
$dtFrom <= $dtRef->modify('+1 day') && |
46
|
|
|
$dtRef <= $dtTo |
47
|
|
|
); |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* Check path rule |
52
|
|
|
* |
53
|
|
|
* @param string $path |
54
|
|
|
* @param string[] $paths |
55
|
|
|
* @return bool |
56
|
|
|
*/ |
57
|
|
|
private function checkPaths($path, array $paths) |
58
|
|
|
{ |
59
|
|
|
$escape = [ |
60
|
|
|
'?' => '\?', |
61
|
|
|
'.' => '\.', |
62
|
|
|
'*' => '.*', |
63
|
|
|
]; |
64
|
|
|
foreach ($paths as $rule) { |
65
|
|
|
foreach ($escape as $search => $replace) { |
66
|
|
|
$rule = str_replace($search, $replace, $rule); |
67
|
|
|
} |
68
|
|
|
if ($this->checkPathsCallback($rule, $path)) { |
69
|
|
|
return true; |
70
|
|
|
} |
71
|
|
|
} |
72
|
|
|
return false; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* Callback for CheckPath |
77
|
|
|
* |
78
|
|
|
* @param string $rule |
79
|
|
|
* @param string $path |
80
|
|
|
* @return bool |
81
|
|
|
*/ |
82
|
|
|
private function checkPathsCallback($rule, $path) |
83
|
|
|
{ |
84
|
|
|
/** |
85
|
|
|
* Warning: preg_match need to be replaced |
86
|
|
|
* |
87
|
|
|
* Bug report |
88
|
|
|
* @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/59 |
89
|
|
|
* |
90
|
|
|
* An robots.txt parser, where a bug-fix is/was planned |
91
|
|
|
* @link https://github.com/diggin/Diggin_RobotRules |
92
|
|
|
* |
93
|
|
|
* References: |
94
|
|
|
* @link https://github.com/diggin/Diggin_RobotRules/blob/d5fe3c7a41be28dcd20fafee3ed4297dbc9e0378/README.markdown |
95
|
|
|
* @link https://github.com/diggin/Diggin_RobotRules/commit/d5fe3c7a41be28dcd20fafee3ed4297dbc9e0378#diff-0a369498a5a8db3ac8fa606b544c9810R19 |
96
|
|
|
* |
97
|
|
|
* The solution? |
98
|
|
|
* PHP PEG (parsing expression grammar) |
99
|
|
|
* @link https://github.com/hafriedlander/php-peg |
100
|
|
|
*/ |
101
|
|
|
$errorHandler = new ErrorHandler(); |
102
|
|
|
set_error_handler([$errorHandler, 'callback'], E_NOTICE | E_WARNING); |
103
|
|
|
if (!preg_match('#' . $rule . '#', $path)) { |
104
|
|
|
// Rule does not match |
105
|
|
|
restore_error_handler(); |
106
|
|
|
return false; |
107
|
|
|
} elseif (mb_strpos($rule, '$') === false || // No special parsing required |
108
|
|
|
mb_substr($rule, 0, -1) === $path // Rule does contain an end anchor, and matches |
109
|
|
|
) { |
110
|
|
|
restore_error_handler(); |
111
|
|
|
return true; |
112
|
|
|
} elseif (($wildcardPos = mb_strrpos($rule, '*')) !== false) { |
113
|
|
|
// Rule contains both an end anchor ($) and wildcard (*) |
114
|
|
|
$afterWildcard = mb_substr($rule, $wildcardPos + 1, mb_strlen($rule) - $wildcardPos - 2); |
115
|
|
|
if ($afterWildcard == mb_substr($path, -mb_strlen($afterWildcard))) { |
116
|
|
|
restore_error_handler(); |
117
|
|
|
return true; |
118
|
|
|
} |
119
|
|
|
} |
120
|
|
|
restore_error_handler(); |
121
|
|
|
return false; |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
|