Completed
Pull Request — master (#2)
by Jan-Petter
02:43
created

RobotsTxtParser::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 6
rs 9.4285
cc 1
eloc 4
nc 1
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser;
3
4
use vipnytt\RobotsTxtParser\Parser\Directives\DirectiveParserCommons;
5
use vipnytt\RobotsTxtParser\Parser\Directives\RootDirectiveHandler;
6
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
7
8
/**
9
 * Class RobotsTxtParser
10
 *
11
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
12
 * @link https://yandex.com/support/webmaster/controlling-robot/robots-txt.xml
13
 * @link http://www.robotstxt.org/robotstxt.html
14
 * @link https://www.w3.org/TR/html4/appendix/notes.html#h-B.4.1.1
15
 * @link http://www.conman.org/people/spc/robots2.html
16
 *
17
 * @package vipnytt\RobotsTxtParser\Parser
18
 */
19
class RobotsTxtParser implements RobotsTxtInterface
20
{
21
    use DirectiveParserCommons;
22
    use UrlParser;
23
24
    /**
25
     * Directive white list
26
     */
27
    const TOP_LEVEL_DIRECTIVES = [
28
        self::DIRECTIVE_CLEAN_PARAM => 'cleanParam',
29
        self::DIRECTIVE_HOST => 'host',
30
        self::DIRECTIVE_SITEMAP => 'sitemap',
31
        self::DIRECTIVE_USER_AGENT => 'userAgent',
32
    ];
33
34
    /**
35
     * Root directive handler
36
     * @var RootDirectiveHandler
37
     */
38
    protected $handler;
39
40
    /**
41
     * Current user-agent(s)
42
     * @var array
43
     */
44
    private $userAgents;
45
46
    /**
47
     * Previous directive
48
     * @var string
49
     */
50
    private $previousDirective;
51
52
    /**
53
     * Basic constructor.
54
     *
55
     * @param string $baseUri
56
     * @param string $content
57
     */
58
    public function __construct($baseUri, $content)
59
    {
60
        mb_internal_encoding(self::ENCODING);
61
        $this->handler = new RootDirectiveHandler($this->urlBase($this->urlEncode($baseUri)));
62
        $this->parseTxt($content);
63
    }
64
65
    /**
66
     * Client robots.txt
67
     *
68
     * @param string $txt
69
     * @return void
70
     */
71
    private function parseTxt($txt)
72
    {
73
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $txt)));
74
        // Client each line individually
75
        foreach ($lines as $line) {
76
            // Limit rule length
77
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
78
            // Remove comments
79
            $line = mb_split('#', $line, 2)[0];
80
            // Client line
81
            $this->add($line);
82
        }
83
    }
84
85
    /**
86
     * Add line
87
     *
88
     * @param string $line
89
     * @return bool
90
     */
91
    public function add($line)
92
    {
93
        $previousDirective = $this->previousDirective;
94
        $pair = $this->generateRulePair($line, array_keys(self::TOP_LEVEL_DIRECTIVES));
95
        if ($pair === false) {
96
            $this->previousDirective = $line;
97
            return $this->handler->userAgent()->add($line);
98
        } elseif ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
99
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
100
                $this->userAgents = [];
101
            }
102
            $this->userAgents[] = $pair['value'];
103
            $this->previousDirective = $pair['directive'];
104
            return $this->handler->userAgent()->set($this->userAgents);
105
        }
106
        $this->previousDirective = $pair['directive'];
107
        return $this->handler->{self::TOP_LEVEL_DIRECTIVES[$pair['directive']]}()->add($pair['value']);
108
    }
109
110
    /**
111
     * Render
112
     *
113
     * @param string $lineSeparator
114
     * @return string
115
     */
116
    public function render($lineSeparator = "\n")
117
    {
118
        return implode($lineSeparator, array_merge(
119
            $this->handler->host()->render(),
120
            $this->handler->cleanParam()->render(),
121
            $this->handler->sitemap()->render(),
122
            $this->handler->userAgent()->render()
123
        ));
124
    }
125
126
    /**
127
     * Rule array
128
     *
129
     * @return array
130
     */
131
    public function export()
132
    {
133
        return [
134
            self::DIRECTIVE_HOST => $this->handler->host()->client()->export(),
135
            self::DIRECTIVE_CLEAN_PARAM => $this->handler->cleanParam()->client()->export(),
136
            self::DIRECTIVE_SITEMAP => $this->handler->sitemap()->client()->export(),
137
            self::DIRECTIVE_USER_AGENT => $this->handler->userAgent()->export(),
138
        ];
139
    }
140
}
141