1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Bee4\RobotsTxt; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Class Rule |
7
|
|
|
* Represent a Ruleset inside a Robots.txt file |
8
|
|
|
* |
9
|
|
|
* @copyright Bee4 2015 |
10
|
|
|
* @author Stephane HULARD <[email protected]> |
11
|
|
|
*/ |
12
|
|
|
class Rule |
13
|
|
|
{ |
14
|
|
|
const COMPILED = 'compiled'; |
15
|
|
|
const DIRTY = 'dirty'; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* User agent on which the rule apply |
19
|
|
|
* @var string |
20
|
|
|
*/ |
21
|
|
|
private $ua; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Rule status (compiled or dirty) |
25
|
|
|
* @var string |
26
|
|
|
*/ |
27
|
|
|
private $state; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* Expression collection with allow / disallow segments |
31
|
|
|
* @var array |
32
|
|
|
*/ |
33
|
|
|
private $exp = [ |
34
|
|
|
'allow' => [], |
35
|
|
|
'disallow' => [] |
36
|
|
|
]; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Compiled regex pattern with allow / disallow segments |
40
|
|
|
* @var array |
41
|
|
|
*/ |
42
|
|
|
private $patterns = [ |
43
|
|
|
'allow' => '', |
44
|
|
|
'disallow' => '' |
45
|
|
|
]; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @param string $ua |
49
|
|
|
*/ |
50
|
|
|
public function __construct($ua) |
51
|
|
|
{ |
52
|
1 |
|
$this->ua = $ua; |
53
|
1 |
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Retrieve rule's UserAgent |
57
|
|
|
* @return string |
58
|
|
|
*/ |
59
|
|
|
public function getUserAgent() |
60
|
|
|
{ |
61
|
1 |
|
return $this->ua; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* Add a pattern to match in the current rule by allowing |
66
|
|
|
* @param string $pattern |
67
|
|
|
* @return Rule |
68
|
|
|
*/ |
69
|
|
|
public function allow($pattern) |
70
|
|
|
{ |
71
|
1 |
|
return $this->addExpression(new Expression($pattern), 'allow'); |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Add a pattern to match in the current rule by disallowing |
76
|
|
|
* @param string $pattern |
77
|
|
|
* @return Rule |
78
|
|
|
*/ |
79
|
|
|
public function disallow($pattern) |
80
|
|
|
{ |
81
|
1 |
|
return $this->addExpression(new Expression($pattern), 'disallow'); |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* Add an expression in the current rule |
86
|
|
|
* @param string $pattern Expression raw pattern |
|
|
|
|
87
|
|
|
* @param string $mode Expression mode (allow / disallow) |
88
|
|
|
* @return Expression |
89
|
|
|
*/ |
90
|
|
|
private function addExpression(Expression $exp, $mode) |
91
|
|
|
{ |
92
|
1 |
|
$this->state = self::DIRTY; |
93
|
1 |
|
$this->exp[$mode][] = $exp; |
94
|
1 |
|
return $this; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Compile expressions to a global pattern |
99
|
|
|
* @return boolean |
100
|
|
|
*/ |
101
|
|
|
private function compile() |
102
|
|
|
{ |
103
|
1 |
|
if (self::COMPILED === $this->state) { |
104
|
1 |
|
return true; |
105
|
|
|
} |
106
|
|
|
|
107
|
1 |
|
$process = function (array &$patterns) { |
108
|
1 |
|
usort($patterns, function (Expression $a, Expression $b) { |
109
|
1 |
|
return strlen($a->getRaw()) < strlen($b->getRaw()); |
110
|
1 |
|
}); |
111
|
|
|
|
112
|
1 |
|
return '/^(('.implode(')|(', $patterns).'))$/'; |
113
|
1 |
|
}; |
114
|
1 |
|
$this->patterns['allow'] = $process($this->exp['allow']); |
115
|
1 |
|
$this->patterns['disallow'] = $process($this->exp['disallow']); |
116
|
1 |
|
$this->state = self::COMPILED; |
117
|
1 |
|
} |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* Check if the URL is allowed or not |
121
|
|
|
* @param string $url |
122
|
|
|
* @return boolean |
123
|
|
|
*/ |
124
|
|
|
public function match($url) |
125
|
|
|
{ |
126
|
1 |
|
$this->compile(); |
127
|
|
|
|
128
|
1 |
|
if (0 < count($this->exp['disallow']) && |
129
|
1 |
|
1 === preg_match($this->patterns['disallow'], $url, $disallowed) ) { |
130
|
1 |
|
if (0 < count($this->exp['allow']) && |
131
|
1 |
|
1 === preg_match($this->patterns['allow'], $url, $allowed) |
132
|
1 |
|
) { |
133
|
1 |
|
$a = $this->lastFilledIndex($allowed); |
134
|
1 |
|
$d = $this->lastFilledIndex($disallowed); |
135
|
|
|
return |
136
|
1 |
|
strlen($this->exp['allow'][$a-2]->getRaw()) >= |
137
|
1 |
|
strlen($this->exp['disallow'][$d-2]->getRaw()); |
138
|
|
|
} |
139
|
|
|
|
140
|
1 |
|
return false; |
141
|
|
|
} |
142
|
|
|
|
143
|
1 |
|
return true; |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
/** |
147
|
|
|
* Retrieve the last filled index in a given array |
148
|
|
|
* @param array $data |
149
|
|
|
* @return integer |
150
|
|
|
*/ |
151
|
|
|
private function lastFilledIndex(array $data) |
152
|
|
|
{ |
153
|
1 |
|
return key(array_slice(array_filter($data), -1, 1, true)); |
154
|
|
|
} |
155
|
|
|
} |
156
|
|
|
|
This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.
Consider the following example. The parameter
$italy
is not defined by the methodfinale(...)
.The most likely cause is that the parameter was removed, but the annotation was not.