Completed
Branch 2.0-dev (2690fb)
by Jan-Petter
02:15
created

RobotsTxtParser::add()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 18
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 18
rs 9.2
cc 4
eloc 14
nc 4
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser;
3
4
use vipnytt\RobotsTxtParser\Parser\Directives\DirectiveParserCommons;
5
use vipnytt\RobotsTxtParser\Parser\Directives\RootDirectiveHandler;
6
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
7
8
/**
9
 * Class Core
10
 *
11
 * @package vipnytt\RobotsTxtParser\Parser
12
 */
13
class RobotsTxtParser implements RobotsTxtInterface
14
{
15
    use DirectiveParserCommons;
16
    use UrlParser;
17
18
    /**
19
     * Directive white list
20
     */
21
    const TOP_LEVEL_DIRECTIVES = [
22
        self::DIRECTIVE_CLEAN_PARAM => 'cleanParam',
23
        self::DIRECTIVE_HOST => 'host',
24
        self::DIRECTIVE_SITEMAP => 'sitemap',
25
        self::DIRECTIVE_USER_AGENT => 'userAgent',
26
    ];
27
28
    /**
29
     * Root directive handler
30
     * @var RootDirectiveHandler
31
     */
32
    protected $handler;
33
34
    /**
35
     * Current user-agent(s)
36
     * @var array
37
     */
38
    private $userAgents;
39
40
    /**
41
     * Previous directive
42
     * @var string
43
     */
44
    private $previousDirective;
45
46
    /**
47
     * Core constructor.
48
     *
49
     * @param string $baseUri
50
     * @param string $content
51
     */
52
    public function __construct($baseUri, $content)
53
    {
54
        mb_internal_encoding(self::ENCODING);
55
        $this->handler = new RootDirectiveHandler($this->urlBase($this->urlEncode($baseUri)));
56
        $this->parseTxt($content);
57
    }
58
59
    /**
60
     * Client robots.txt
61
     *
62
     * @param string $txt
63
     * @return void
64
     */
65
    private function parseTxt($txt)
66
    {
67
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $txt)));
68
        // Client each line individually
69
        foreach ($lines as $line) {
70
            // Limit rule length
71
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
72
            // Remove comments
73
            $line = mb_split('#', $line, 2)[0];
74
            // Client line
75
            $this->add($line);
76
        }
77
    }
78
79
    /**
80
     * Add line
81
     *
82
     * @param string $line
83
     * @return bool
84
     */
85
    public function add($line)
86
    {
87
        $previousDirective = $this->previousDirective;
88
        $pair = $this->generateRulePair($line, array_keys(self::TOP_LEVEL_DIRECTIVES));
89
        if ($pair === false) {
90
            $this->previousDirective = $line;
91
            return $this->handler->userAgent()->add($line);
92
        } elseif ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
93
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
94
                $this->userAgents = [];
95
            }
96
            $this->userAgents[] = $pair['value'];
97
            $this->previousDirective = $pair['directive'];
98
            return $this->handler->userAgent()->set($this->userAgents);
99
        }
100
        $this->previousDirective = $pair['directive'];
101
        return $this->handler->{self::TOP_LEVEL_DIRECTIVES[$pair['directive']]}()->add($pair['value']);
102
    }
103
104
    /**
105
     * Render
106
     *
107
     * @param string $lineSeparator
108
     * @return string
109
     */
110
    public function render($lineSeparator = "\n")
111
    {
112
        return implode($lineSeparator, array_merge(
113
            $this->handler->host()->render(),
114
            $this->handler->cleanParam()->render(),
115
            $this->handler->sitemap()->render(),
116
            $this->handler->userAgent()->render()
117
        ));
118
    }
119
120
    /**
121
     * Export rules
122
     *
123
     * @return array
124
     */
125
    public function export()
126
    {
127
        return array_merge(
128
            $this->handler->host()->getRules(),
129
            $this->handler->cleanParam()->getRules(),
130
            $this->handler->sitemap()->getRules(),
131
            $this->handler->userAgent()->getRules()
132
        );
133
    }
134
}
135