1
|
|
|
<?php |
2
|
|
|
namespace vipnytt\RobotsTxtParser\Core\Directives; |
3
|
|
|
|
4
|
|
|
use vipnytt\RobotsTxtParser\Core\RobotsTxtInterface; |
5
|
|
|
use vipnytt\RobotsTxtParser\Core\Toolbox; |
6
|
|
|
use vipnytt\RobotsTxtParser\Core\UrlParser; |
7
|
|
|
use vipnytt\RobotsTxtParser\Exceptions; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* Class DisAllow |
11
|
|
|
* |
12
|
|
|
* @package vipnytt\RobotsTxtParser\Core\Directives |
13
|
|
|
*/ |
14
|
|
|
class DisAllow implements DirectiveInterface, RobotsTxtInterface |
15
|
|
|
{ |
16
|
|
|
use Toolbox; |
17
|
|
|
use UrlParser; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* Directive alternatives |
21
|
|
|
*/ |
22
|
|
|
const DIRECTIVE = [ |
23
|
|
|
self::DIRECTIVE_ALLOW, |
24
|
|
|
self::DIRECTIVE_DISALLOW, |
25
|
|
|
]; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Sub directives white list |
29
|
|
|
*/ |
30
|
|
|
const SUB_DIRECTIVES = [ |
31
|
|
|
self::DIRECTIVE_CLEAN_PARAM, |
32
|
|
|
self::DIRECTIVE_HOST, |
33
|
|
|
]; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* Directive |
37
|
|
|
* @var string |
38
|
|
|
*/ |
39
|
|
|
protected $directive; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Rule array |
43
|
|
|
* @var array |
44
|
|
|
*/ |
45
|
|
|
protected $array = []; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* Sub-directive Clean-param |
49
|
|
|
* @var CleanParam |
50
|
|
|
*/ |
51
|
|
|
protected $cleanParam; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Sub-directive Host |
55
|
|
|
* @var Host |
56
|
|
|
*/ |
57
|
|
|
protected $host; |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* DisAllow constructor |
61
|
|
|
* |
62
|
|
|
* @param string $directive |
63
|
|
|
*/ |
64
|
|
|
public function __construct($directive) |
65
|
|
|
{ |
66
|
|
|
$this->directive = $this->validateDirective($directive, self::DIRECTIVE); |
67
|
|
|
$this->cleanParam = new CleanParam(); |
68
|
|
|
$this->host = new Host(); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Add |
73
|
|
|
* |
74
|
|
|
* @param string $line |
75
|
|
|
* @return bool |
76
|
|
|
*/ |
77
|
|
|
public function add($line) |
78
|
|
|
{ |
79
|
|
|
$pair = $this->generateRulePair($line, self::SUB_DIRECTIVES); |
80
|
|
|
switch ($pair['directive']) { |
81
|
|
|
case self::DIRECTIVE_CLEAN_PARAM: |
82
|
|
|
return $this->cleanParam->add($pair['value']); |
83
|
|
|
case self::DIRECTIVE_HOST: |
84
|
|
|
return $this->host->add($pair['value']); |
85
|
|
|
} |
86
|
|
|
return $this->addPath($line); |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
/** |
90
|
|
|
* Add plain path to allow/disallow |
91
|
|
|
* |
92
|
|
|
* @param string $rule |
93
|
|
|
* @return bool |
94
|
|
|
*/ |
95
|
|
|
protected function addPath($rule) |
96
|
|
|
{ |
97
|
|
|
if (isset($this->array['path']) && in_array($rule, $this->array['path'])) { |
98
|
|
|
return false; |
99
|
|
|
} |
100
|
|
|
$this->array['path'][] = $rule; |
101
|
|
|
return true; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* Check |
106
|
|
|
* |
107
|
|
|
* @param string $url |
108
|
|
|
* @return bool |
109
|
|
|
*/ |
110
|
|
|
public function check($url) |
111
|
|
|
{ |
112
|
|
|
$path = $this->getPath($url); |
113
|
|
|
return ($path === false) ? false : ( |
114
|
|
|
$this->checkPath($path, isset($this->array['path']) ? $this->array['path'] : []) || |
115
|
|
|
$this->cleanParam->check($path) || |
116
|
|
|
$this->host->check($url) |
117
|
|
|
); |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* Get path and query |
122
|
|
|
* |
123
|
|
|
* @param string $url |
124
|
|
|
* @return string |
125
|
|
|
* @throws Exceptions\ClientException |
126
|
|
|
*/ |
127
|
|
|
protected function getPath($url) |
128
|
|
|
{ |
129
|
|
|
// Encode |
130
|
|
|
$url = $this->urlEncode($url); |
131
|
|
View Code Duplication |
if (mb_stripos($url, '/') === 0) { |
|
|
|
|
132
|
|
|
// Strip fragments |
133
|
|
|
$url = mb_split('#', $url)[0]; |
134
|
|
|
return $url; |
135
|
|
|
} |
136
|
|
|
if (!$this->urlValidate($url)) { |
137
|
|
|
throw new Exceptions\ClientException('Invalid URL'); |
138
|
|
|
} |
139
|
|
|
$path = (($path = parse_url($url, PHP_URL_PATH)) === null) ? '/' : $path; |
140
|
|
|
$query = (($query = parse_url($url, PHP_URL_QUERY)) === null) ? '' : '?' . $query; |
141
|
|
|
return $path . $query; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* Export rules |
146
|
|
|
* |
147
|
|
|
* @return array |
148
|
|
|
*/ |
149
|
|
|
public function export() |
150
|
|
|
{ |
151
|
|
|
$result = array_merge( |
152
|
|
|
$this->array, |
153
|
|
|
$this->cleanParam->export(), |
154
|
|
|
$this->host->export() |
155
|
|
|
); |
156
|
|
|
return empty($result) ? [] : [$this->directive => $result]; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Render |
161
|
|
|
* |
162
|
|
|
* @return string[] |
163
|
|
|
*/ |
164
|
|
|
public function render() |
165
|
|
|
{ |
166
|
|
|
$result = []; |
167
|
|
|
$render = array_merge( |
168
|
|
|
$this->array, |
169
|
|
|
$this->cleanParam->render(), |
170
|
|
|
$this->host->render() |
171
|
|
|
); |
172
|
|
|
foreach ($render as $value) { |
173
|
|
|
if (is_array($value)) { |
174
|
|
|
foreach ($value as $path) { |
175
|
|
|
$result[] = $this->directive . ':' . $path; |
176
|
|
|
} |
177
|
|
|
continue; |
178
|
|
|
} |
179
|
|
|
$result[] = $this->directive . ':' . $value; |
180
|
|
|
} |
181
|
|
|
return $result; |
182
|
|
|
} |
183
|
|
|
} |
184
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.