Completed
Push — master ( a6d8dc...680f8e )
by Jan-Petter
03:57
created

Parser::render()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 9
rs 9.6666
cc 1
eloc 6
nc 1
nop 0
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Parser\CharacterEncodingConvert;
5
use vipnytt\RobotsTxtParser\Parser\Directives\CleanParam;
6
use vipnytt\RobotsTxtParser\Parser\Directives\Host;
7
use vipnytt\RobotsTxtParser\Parser\Directives\Sitemap;
8
use vipnytt\RobotsTxtParser\Parser\Directives\UserAgent;
9
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
10
use vipnytt\RobotsTxtParser\Parser\Toolbox;
11
12
/**
13
 * Class Parser
14
 *
15
 * @package vipnytt\RobotsTxtParser
16
 */
17
abstract class Parser implements RobotsTxtInterface
18
{
19
    use Toolbox;
20
21
    /**
22
     * Directive white list
23
     */
24
    const TOP_LEVEL_DIRECTIVES = [
25
        self::DIRECTIVE_CLEAN_PARAM,
26
        self::DIRECTIVE_HOST,
27
        self::DIRECTIVE_SITEMAP,
28
        self::DIRECTIVE_USER_AGENT,
29
    ];
30
31
    /**
32
     * Previous directive
33
     * @var string
34
     */
35
    protected $previousDirective;
36
37
    /**
38
     * Current user-agent(s)
39
     * @var array
40
     */
41
    protected $userAgentValues;
42
43
    /**
44
     * Clean-param class
45
     * @var CleanParam
46
     */
47
    protected $cleanParam;
48
49
    /**
50
     * Host class
51
     * @var Host
52
     */
53
    protected $host;
54
55
    /**
56
     * Sitemap class
57
     * @var Sitemap
58
     */
59
    protected $sitemap;
60
61
    /**
62
     * User-agent class
63
     * @var UserAgent
64
     */
65
    protected $userAgent;
66
67
    /**
68
     * Core constructor.
69
     *
70
     * @param string $content - file content
71
     * @param string $encoding - character encoding
72
     * @param int|null $byteLimit - maximum of bytes to parse
73
     */
74
    public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT)
75
    {
76
        $this->cleanParam = new CleanParam();
77
        $this->host = new Host();
78
        $this->sitemap = new Sitemap();
79
        $this->userAgent = new UserAgent();
80
        $content = $this->convertEncoding($encoding, $content);
81
        if (is_int($byteLimit) && $byteLimit > 0) {
82
            $content = mb_strcut($content, 0, $byteLimit);
83
        }
84
        $this->parseTxt($content);
85
    }
86
87
    /**
88
     * Convert character encoding
89
     *
90
     * @param string $encoding
91
     * @param string $content
92
     * @return string
93
     */
94
    protected function convertEncoding($encoding, $content)
95
    {
96
        mb_internal_encoding(self::ENCODING);
97
        if ($encoding == self::ENCODING) {
98
            return $content;
99
        }
100
        $convert = new CharacterEncodingConvert($content, $encoding, self::ENCODING);
101
        if (($result = $convert->auto()) !== false) {
102
            return $result;
103
        }
104
        return $content;
105
    }
106
107
    /**
108
     * Parse robots.txt
109
     *
110
     * @param string $txt
111
     * @return void
112
     */
113
    private function parseTxt($txt)
114
    {
115
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $txt)));
116
        // Parse each line individually
117
        foreach ($lines as $line) {
118
            // Limit rule length
119
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
120
            // Remove comments
121
            $line = mb_split('#', $line, 2)[0];
122
            // Parse line
123
            $this->add($line);
124
        }
125
    }
126
127
    /**
128
     * Add line
129
     *
130
     * @param string $line
131
     * @return bool
132
     */
133
    public function add($line)
134
    {
135
        $previousDirective = $this->previousDirective;
136
        $pair = $this->generateRulePair($line, self::TOP_LEVEL_DIRECTIVES);
137
        if ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
138
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
139
                $this->userAgentValues = [];
140
            }
141
            $this->userAgentValues[] = $pair['value'];
142
        }
143
        $this->previousDirective = $pair['directive'];
144
        switch ($pair['directive']) {
145
            case self::DIRECTIVE_CLEAN_PARAM:
146
                return $this->cleanParam->add($pair['value']);
147
            case self::DIRECTIVE_HOST:
148
                return $this->host->add($pair['value']);
149
            case self::DIRECTIVE_SITEMAP:
150
                return $this->sitemap->add($pair['value']);
151
            case self::DIRECTIVE_USER_AGENT:
152
                return $this->userAgent->set($this->userAgentValues);
153
        }
154
        return $this->userAgent->add($line);
155
    }
156
157
    /**
158
     * Render
159
     *
160
     * @return string
161
     */
162
    public function render()
163
    {
164
        return implode("\r\n", array_merge(
165
            $this->cleanParam->render(),
166
            $this->host->render(),
167
            $this->sitemap->render(),
168
            $this->userAgent->render()
169
        ));
170
    }
171
172
    /**
173
     * Export rules
174
     *
175
     * @return array
176
     */
177
    public function export()
178
    {
179
        return array_merge(
180
            $this->cleanParam->export(),
181
            $this->host->export(),
182
            $this->sitemap->export(),
183
            $this->userAgent->export()
184
        );
185
    }
186
}
187