1
|
|
|
<?php |
2
|
|
|
namespace vipnytt\RobotsTxtParser\Directives; |
3
|
|
|
|
4
|
|
|
use vipnytt\RobotsTxtParser\ObjectTools; |
5
|
|
|
use vipnytt\RobotsTxtParser\RobotsTxtInterface; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* Class UserAgent |
9
|
|
|
* |
10
|
|
|
* @package vipnytt\RobotsTxtParser\Directives |
11
|
|
|
*/ |
12
|
|
|
class UserAgent implements DirectiveInterface, RobotsTxtInterface |
13
|
|
|
{ |
14
|
|
|
use ObjectTools; |
15
|
|
|
|
16
|
|
|
const SUB_DIRECTIVES = [ |
17
|
|
|
self::DIRECTIVE_ALLOW, |
18
|
|
|
self::DIRECTIVE_CACHE_DELAY, |
19
|
|
|
self::DIRECTIVE_CRAWL_DELAY, |
20
|
|
|
self::DIRECTIVE_DISALLOW, |
21
|
|
|
]; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Directive |
25
|
|
|
*/ |
26
|
|
|
const DIRECTIVE = 'User-agent'; |
27
|
|
|
|
28
|
|
|
protected $userAgent = []; |
29
|
|
|
protected $userAgents = []; |
30
|
|
|
|
31
|
|
|
protected $parent; |
32
|
|
|
protected $array = []; |
33
|
|
|
|
34
|
|
|
protected $allow = []; |
35
|
|
|
protected $cacheDelay = []; |
36
|
|
|
protected $crawlDelay = []; |
37
|
|
|
protected $disallow = []; |
38
|
|
|
|
39
|
|
|
public function __construct($parent = null) |
40
|
|
|
{ |
41
|
|
|
$this->set(self::USER_AGENT); |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
public function set($line, $append = false) |
45
|
|
|
{ |
46
|
|
|
if (!$append) { |
47
|
|
|
$this->userAgent = []; |
48
|
|
|
} |
49
|
|
|
$this->userAgent[] = $line; |
50
|
|
|
if (!in_array($line, $this->userAgents)) { |
51
|
|
|
$this->allow[$line] = new Allow(self::DIRECTIVE); |
52
|
|
|
$this->cacheDelay[$line] = new CacheDelay(self::DIRECTIVE); |
53
|
|
|
$this->crawlDelay[$line] = new CrawlDelay(self::DIRECTIVE); |
54
|
|
|
$this->disallow[$line] = new Disallow(self::DIRECTIVE); |
55
|
|
|
$this->userAgents[] = $line; |
56
|
|
|
} |
57
|
|
|
return true; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Add |
62
|
|
|
* |
63
|
|
|
* @param string $line |
64
|
|
|
* @return bool |
65
|
|
|
*/ |
66
|
|
|
public function add($line) |
67
|
|
|
{ |
68
|
|
|
$result = false; |
69
|
|
|
$pair = $this->generateRulePair($line, self::SUB_DIRECTIVES); |
70
|
|
|
switch ($pair['directive']) { |
71
|
|
View Code Duplication |
case self::DIRECTIVE_ALLOW: |
|
|
|
|
72
|
|
|
foreach ($this->userAgent as $userAgent) { |
73
|
|
|
$result = $this->allow[$userAgent]->add($pair['value']); |
74
|
|
|
} |
75
|
|
|
return $result; |
76
|
|
View Code Duplication |
case self::DIRECTIVE_CACHE_DELAY: |
|
|
|
|
77
|
|
|
foreach ($this->userAgent as $userAgent) { |
78
|
|
|
$result = $this->cacheDelay[$userAgent]->add($pair['value']); |
79
|
|
|
} |
80
|
|
|
return $result; |
81
|
|
View Code Duplication |
case self::DIRECTIVE_CRAWL_DELAY: |
|
|
|
|
82
|
|
|
foreach ($this->userAgent as $userAgent) { |
83
|
|
|
$result = $this->crawlDelay[$userAgent]->add($pair['value']); |
84
|
|
|
} |
85
|
|
|
return $result; |
86
|
|
View Code Duplication |
case self::DIRECTIVE_DISALLOW: |
|
|
|
|
87
|
|
|
foreach ($this->userAgent as $userAgent) { |
88
|
|
|
$result = $this->disallow[$userAgent]->add($pair['value']); |
89
|
|
|
} |
90
|
|
|
return $result; |
91
|
|
|
} |
92
|
|
|
return false; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Check rules |
97
|
|
|
* |
98
|
|
|
* @param string $url - URL to check |
99
|
|
|
* @param string $type - directive to check |
100
|
|
|
* @return bool |
101
|
|
|
*/ |
102
|
|
|
public function check($url, $type) |
103
|
|
|
{ |
104
|
|
|
$result = ($type === self::DIRECTIVE_ALLOW); |
105
|
|
|
foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $directive) { |
106
|
|
|
if ($this->$directive->check($url)) { |
107
|
|
|
$result = ($type === $directive); |
108
|
|
|
} |
109
|
|
|
} |
110
|
|
|
return $result; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
public function export() |
114
|
|
|
{ |
115
|
|
|
$result = []; |
116
|
|
|
foreach ($this->userAgents as $userAgent) { |
117
|
|
|
$current = $this->allow[$userAgent]->export() |
118
|
|
|
+ $this->cacheDelay[$userAgent]->export() |
119
|
|
|
+ $this->crawlDelay[$userAgent]->export() |
120
|
|
|
+ $this->disallow[$userAgent]->export(); |
121
|
|
|
if (!empty($current)) { |
122
|
|
|
$result[$userAgent] = $current; |
123
|
|
|
} |
124
|
|
|
} |
125
|
|
|
return empty($result) ? [] : [self::DIRECTIVE => $result]; |
126
|
|
|
} |
127
|
|
|
} |
128
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.