|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Bee4\RobotsTxt; |
|
4
|
|
|
|
|
5
|
|
|
use Bee4\RobotsTxt\Exception\RuntimeException; |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* Class Parser |
|
9
|
|
|
* Take the content of a robots.txt file and transform it to rules |
|
10
|
|
|
* |
|
11
|
|
|
* @copyright Bee4 2015 |
|
12
|
|
|
* @author Stephane HULARD <[email protected]> |
|
13
|
|
|
*/ |
|
14
|
|
|
class Parser |
|
15
|
|
|
{ |
|
16
|
|
|
/** |
|
17
|
|
|
* Transform file content to structured Rules |
|
18
|
|
|
* @param string|Content $content |
|
19
|
|
|
* @return Rules |
|
20
|
|
|
*/ |
|
21
|
4 |
|
public static function parse($content) |
|
22
|
|
|
{ |
|
23
|
4 |
|
if (is_string($content)) { |
|
24
|
2 |
|
$content = new Content($content); |
|
25
|
|
|
} |
|
26
|
4 |
|
if (!($content instanceof Content)) { |
|
27
|
|
|
throw new RuntimeException( |
|
28
|
|
|
'You must use a `string` or a `Content` instance to the `Parser`!' |
|
29
|
|
|
); |
|
30
|
|
|
} |
|
31
|
|
|
|
|
32
|
4 |
|
$rules = new Rules(); |
|
33
|
4 |
|
$userAgent = $rule = null; |
|
34
|
4 |
|
$separator = "\r\n"; |
|
35
|
4 |
|
$line = strtok($content->get(), $separator); |
|
36
|
4 |
|
while ($line !== false) { |
|
37
|
2 |
|
if (strpos($line, '#') !== 0) { |
|
38
|
2 |
|
if (preg_match('/^User-Agent\: (.*)$/i', $line, $matches)) { |
|
39
|
2 |
|
if ($userAgent !== null && $rule !== null) { |
|
40
|
|
|
$rules->add($userAgent, $rule); |
|
41
|
|
|
} |
|
42
|
2 |
|
$userAgent = $matches[1]; |
|
43
|
2 |
|
$rule = new Rule(); |
|
44
|
2 |
|
} elseif (preg_match('/^Allow: (.*)$/i', $line, $matches)) { |
|
45
|
|
|
$rule->allow($matches[1]); |
|
46
|
2 |
|
} elseif (preg_match('/^Disallow: (.*)$/i', $line, $matches)) { |
|
47
|
2 |
|
$rule->disallow($matches[1]); |
|
48
|
|
|
} |
|
49
|
|
|
} |
|
50
|
|
|
|
|
51
|
2 |
|
$line = strtok($separator); |
|
52
|
|
|
} |
|
53
|
|
|
//Handle the last item in the loop |
|
54
|
2 |
|
if ($rule instanceof Rule) { |
|
55
|
|
|
$rules->add($userAgent, $rule); |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
2 |
|
return $rules; |
|
59
|
|
|
} |
|
60
|
|
|
} |
|
61
|
|
|
|