Completed
Push — master ( eed8e0...911395 )
by Jan-Petter
02:02
created

Parser::__construct()   B

Complexity

Conditions 5
Paths 3

Size

Total Lines 17
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 8
Bugs 1 Features 0
Metric Value
c 8
b 1
f 0
dl 0
loc 17
rs 8.8571
cc 5
eloc 12
nc 3
nop 3
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\EncodingException;
5
use vipnytt\RobotsTxtParser\Parser\Directives\CleanParam;
6
use vipnytt\RobotsTxtParser\Parser\Directives\Host;
7
use vipnytt\RobotsTxtParser\Parser\Directives\Sitemap;
8
use vipnytt\RobotsTxtParser\Parser\Directives\UserAgent;
9
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
10
use vipnytt\RobotsTxtParser\Parser\Toolbox;
11
12
/**
13
 * Class Core
14
 *
15
 * @package vipnytt\RobotsTxtParser
16
 */
17
abstract class Parser implements RobotsTxtInterface
18
{
19
    use Toolbox;
20
21
    /**
22
     * Directive white list
23
     */
24
    const TOP_LEVEL_DIRECTIVES = [
25
        self::DIRECTIVE_CLEAN_PARAM,
26
        self::DIRECTIVE_HOST,
27
        self::DIRECTIVE_SITEMAP,
28
        self::DIRECTIVE_USER_AGENT,
29
    ];
30
31
    /**
32
     * Previous directive
33
     * @var string
34
     */
35
    protected $previousDirective;
36
37
    /**
38
     * Current user-agent(s)
39
     * @var array
40
     */
41
    protected $userAgentValues;
42
43
    /**
44
     * Clean-param class
45
     * @var CleanParam
46
     */
47
    protected $cleanParam;
48
49
    /**
50
     * Host class
51
     * @var Host
52
     */
53
    protected $host;
54
55
    /**
56
     * Sitemap class
57
     * @var Sitemap
58
     */
59
    protected $sitemap;
60
61
    /**
62
     * User-agent class
63
     * @var UserAgent
64
     */
65
    protected $userAgent;
66
67
    /**
68
     * Core constructor.
69
     *
70
     * @param string $content - file content
71
     * @param string $encoding - character encoding
72
     * @param int|null $byteLimit - maximum of bytes to parse
73
     * @throws EncodingException
74
     */
75
    public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT)
76
    {
77
        if (
78
            !in_array($encoding, mb_list_encodings()) ||
79
            !mb_internal_encoding($encoding)
80
        ) {
81
            throw new EncodingException('Unable to set internal character encoding to `' . $encoding . '`');
82
        }
83
        $this->cleanParam = new CleanParam();
84
        $this->host = new Host();
85
        $this->sitemap = new Sitemap();
86
        $this->userAgent = new UserAgent();
87
        if (is_int($byteLimit) && $byteLimit > 0) {
88
            $content = mb_strcut($content, 0, $byteLimit);
89
        }
90
        $this->parseTxt($content);
91
    }
92
93
    /**
94
     * Parse robots.txt
95
     *
96
     * @param string $txt
97
     * @return void
98
     */
99
    private function parseTxt($txt)
100
    {
101
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $txt)));
102
        // Parse each line individually
103
        foreach ($lines as $line) {
104
            // Limit rule length
105
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
106
            // Remove comments
107
            $line = mb_split('#', $line, 2)[0];
108
            // Parse line
109
            $this->add($line);
110
        }
111
    }
112
113
    /**
114
     * Add line
115
     *
116
     * @param string $line
117
     * @return bool
118
     */
119
    public function add($line)
120
    {
121
        $previousDirective = $this->previousDirective;
122
        $pair = $this->generateRulePair($line, self::TOP_LEVEL_DIRECTIVES);
123
        if ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
124
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
125
                $this->userAgentValues = [];
126
            }
127
            $this->userAgentValues[] = $pair['value'];
128
        }
129
        $this->previousDirective = $pair['directive'];
130
        switch ($pair['directive']) {
131
            case self::DIRECTIVE_CLEAN_PARAM:
132
                return $this->cleanParam->add($pair['value']);
133
            case self::DIRECTIVE_HOST:
134
                return $this->host->add($pair['value']);
135
            case self::DIRECTIVE_SITEMAP:
136
                return $this->sitemap->add($pair['value']);
137
            case self::DIRECTIVE_USER_AGENT:
138
                return $this->userAgent->set($this->userAgentValues);
139
        }
140
        return $this->userAgent->add($line);
141
    }
142
143
    /**
144
     * Export
145
     *
146
     * @return array
147
     */
148
    public function export()
149
    {
150
        return $this->cleanParam->export()
151
        + $this->host->export()
152
        + $this->sitemap->export()
153
        + $this->userAgent->export();
154
    }
155
}
156