Completed
Push — master ( 705095...78cb27 )
by Jan-Petter
04:57
created

Parser::add()   C

Complexity

Conditions 7
Paths 15

Size

Total Lines 23
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
c 4
b 0
f 0
dl 0
loc 23
rs 6.7272
cc 7
eloc 18
nc 15
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ParserException;
5
use vipnytt\RobotsTxtParser\Parser\Directives\CleanParam;
6
use vipnytt\RobotsTxtParser\Parser\Directives\Host;
7
use vipnytt\RobotsTxtParser\Parser\Directives\Sitemap;
8
use vipnytt\RobotsTxtParser\Parser\Directives\UserAgent;
9
use vipnytt\RobotsTxtParser\Parser\Toolbox;
10
11
/**
12
 * Class Core
13
 *
14
 * @package vipnytt\RobotsTxtParser
15
 */
16
abstract class Parser implements RobotsTxtInterface
17
{
18
    use Toolbox;
19
20
    /**
21
     * Directive white list
22
     */
23
    const TOP_LEVEL_DIRECTIVES = [
24
        self::DIRECTIVE_CLEAN_PARAM,
25
        self::DIRECTIVE_HOST,
26
        self::DIRECTIVE_SITEMAP,
27
        self::DIRECTIVE_USER_AGENT,
28
    ];
29
30
    /**
31
     * RAW robots.txt content
32
     * @var string
33
     */
34
    protected $raw;
35
36
    /**
37
     * Previous directive
38
     * @var string
39
     */
40
    protected $previousDirective;
41
42
    /**
43
     * Current user-agent(s)
44
     * @var array
45
     */
46
    protected $userAgentValues;
47
48
    /**
49
     * Clean-param class
50
     * @var CleanParam
51
     */
52
    protected $cleanParam;
53
54
    /**
55
     * Host class
56
     * @var Host
57
     */
58
    protected $host;
59
60
    /**
61
     * Sitemap class
62
     * @var Sitemap
63
     */
64
    protected $sitemap;
65
66
    /**
67
     * User-agent class
68
     * @var UserAgent
69
     */
70
    protected $userAgent;
71
72
    /**
73
     * Core constructor.
74
     *
75
     * @param string $content - file content
76
     * @param string $encoding - character encoding
77
     * @param integer|null $byteLimit - maximum of bytes to parse
78
     * @throws ParserException
79
     */
80
    public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT)
81
    {
82
        if (
83
            !in_array($encoding, mb_list_encodings()) ||
84
            !mb_internal_encoding($encoding)
85
        ) {
86
            throw new ParserException('Unable to set internal character encoding to `' . $encoding . '`');
87
        }
88
        $this->cleanParam = new CleanParam();
89
        $this->host = new Host();
90
        $this->sitemap = new Sitemap();
91
        $this->userAgent = new UserAgent();
92
93
        $this->raw = is_int($byteLimit) ? mb_strcut($content, 0, $byteLimit, $encoding) : $content;
94
        $this->parseTxt();
95
    }
96
97
    /**
98
     * Parse robots.txt
99
     *
100
     * @return void
101
     */
102
    private function parseTxt()
103
    {
104
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $this->raw)));
105
        // Parse each line individually
106
        foreach ($lines as $line) {
107
            // Limit rule length
108
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
109
            // Remove comments
110
            $line = mb_split('#', $line, 2)[0];
111
            // Parse line
112
            $this->add($line);
113
        }
114
    }
115
116
    /**
117
     * Add line
118
     *
119
     * @param string $line
120
     * @return bool
121
     */
122
    public function add($line)
123
    {
124
        $previousDirective = $this->previousDirective;
125
        $pair = $this->generateRulePair($line, self::TOP_LEVEL_DIRECTIVES);
126
        if ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
127
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
128
                $this->userAgentValues = [];
129
            }
130
            $this->userAgentValues[] = $pair['value'];
131
        }
132
        $this->previousDirective = $pair['directive'];
133
        switch ($pair['directive']) {
134
            case self::DIRECTIVE_CLEAN_PARAM:
135
                return $this->cleanParam->add($pair['value']);
136
            case self::DIRECTIVE_HOST:
137
                return $this->host->add($pair['value']);
138
            case self::DIRECTIVE_SITEMAP:
139
                return $this->sitemap->add($pair['value']);
140
            case self::DIRECTIVE_USER_AGENT:
141
                return $this->userAgent->set($this->userAgentValues);
142
        }
143
        return $this->userAgent->add($line);
144
    }
145
146
    /**
147
     * Export
148
     *
149
     * @return array
150
     */
151
    public function export()
152
    {
153
        return $this->cleanParam->export()
154
        + $this->host->export()
155
        + $this->sitemap->export()
156
        + $this->userAgent->export();
157
    }
158
}
159