|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Bee4\RobotsTxt; |
|
4
|
|
|
|
|
5
|
|
|
use Bee4\RobotsTxt\Exception\InvalidContentException; |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* Class Parser |
|
9
|
|
|
* Take the content of a robots.txt file and transform it to rules |
|
10
|
|
|
* |
|
11
|
|
|
* @copyright Bee4 2015 |
|
12
|
|
|
* @author Stephane HULARD <[email protected]> |
|
13
|
|
|
*/ |
|
14
|
|
|
class Parser |
|
15
|
|
|
{ |
|
16
|
|
|
/** |
|
17
|
|
|
* Transform file content to structured Rules |
|
18
|
|
|
* @param string|Content $content |
|
19
|
|
|
* @return Rules |
|
20
|
|
|
*/ |
|
21
|
|
|
public static function parse($content) |
|
22
|
|
|
{ |
|
23
|
1 |
|
if (is_string($content)) { |
|
24
|
1 |
|
$content = new Content($content); |
|
25
|
1 |
|
} |
|
26
|
1 |
|
if (!($content instanceof Content)) { |
|
27
|
1 |
|
throw (new InvalidContentException( |
|
28
|
|
|
'Content must be a `string` or a `Content` instance' |
|
29
|
1 |
|
))->setContent($content); |
|
30
|
|
|
} |
|
31
|
|
|
|
|
32
|
1 |
|
$rules = new Rules(); |
|
33
|
1 |
|
$userAgent = $rule = null; |
|
34
|
1 |
|
$separator = "\r\n"; |
|
35
|
1 |
|
$line = strtok($content->get(), $separator); |
|
36
|
1 |
|
while ($line !== false) { |
|
37
|
1 |
|
if (strpos($line, '#') !== 0) { |
|
38
|
1 |
|
if (preg_match('/^\s*User-Agent\: (.*)$/i', $line, $matches)) { |
|
39
|
1 |
|
if ($userAgent !== null && $rule !== null) { |
|
40
|
1 |
|
$rules->add($rule); |
|
41
|
1 |
|
} |
|
42
|
1 |
|
$userAgent = $matches[1]; |
|
43
|
1 |
|
$rule = new Rule($userAgent); |
|
44
|
1 |
|
} elseif (preg_match('/^\s*Allow: (.*)$/i', $line, $matches)) { |
|
45
|
1 |
|
$rule->allow($matches[1]); |
|
46
|
1 |
|
} elseif (preg_match('/^\s*Disallow: (.*)$/i', $line, $matches)) { |
|
47
|
1 |
|
$rule->disallow($matches[1]); |
|
48
|
1 |
|
} elseif (preg_match('/^\s*Sitemap: (.*)$/i', $line, $matches)) { |
|
49
|
1 |
|
$rules->addSitemap($matches[1]); |
|
50
|
1 |
|
} |
|
51
|
1 |
|
} |
|
52
|
|
|
|
|
53
|
1 |
|
$line = strtok($separator); |
|
54
|
1 |
|
} |
|
55
|
|
|
//Handle the last item in the loop |
|
56
|
1 |
|
if ($rule instanceof Rule) { |
|
57
|
1 |
|
$rules->add($rule); |
|
58
|
1 |
|
} |
|
59
|
|
|
|
|
60
|
1 |
|
return $rules; |
|
61
|
|
|
} |
|
62
|
|
|
} |
|
63
|
|
|
|