Completed
Push — master ( 911395...24310f )
by Jan-Petter
02:47
created

Parser::__construct()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 14
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 9
Bugs 2 Features 0
Metric Value
c 9
b 2
f 0
dl 0
loc 14
rs 9.2
cc 4
eloc 10
nc 3
nop 3
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\EncodingException;
5
use vipnytt\RobotsTxtParser\Parser\Directives\CleanParam;
6
use vipnytt\RobotsTxtParser\Parser\Directives\Host;
7
use vipnytt\RobotsTxtParser\Parser\Directives\Sitemap;
8
use vipnytt\RobotsTxtParser\Parser\Directives\UserAgent;
9
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
10
use vipnytt\RobotsTxtParser\Parser\Toolbox;
11
12
/**
13
 * Class Core
14
 *
15
 * @package vipnytt\RobotsTxtParser
16
 */
17
abstract class Parser implements RobotsTxtInterface
18
{
19
    use Toolbox;
20
21
    /**
22
     * Directive white list
23
     */
24
    const TOP_LEVEL_DIRECTIVES = [
25
        self::DIRECTIVE_CLEAN_PARAM,
26
        self::DIRECTIVE_HOST,
27
        self::DIRECTIVE_SITEMAP,
28
        self::DIRECTIVE_USER_AGENT,
29
    ];
30
31
    /**
32
     * Previous directive
33
     * @var string
34
     */
35
    protected $previousDirective;
36
37
    /**
38
     * Current user-agent(s)
39
     * @var array
40
     */
41
    protected $userAgentValues;
42
43
    /**
44
     * Clean-param class
45
     * @var CleanParam
46
     */
47
    protected $cleanParam;
48
49
    /**
50
     * Host class
51
     * @var Host
52
     */
53
    protected $host;
54
55
    /**
56
     * Sitemap class
57
     * @var Sitemap
58
     */
59
    protected $sitemap;
60
61
    /**
62
     * User-agent class
63
     * @var UserAgent
64
     */
65
    protected $userAgent;
66
67
    /**
68
     * Core constructor.
69
     *
70
     * @param string $content - file content
71
     * @param string $encoding - character encoding
72
     * @param int|null $byteLimit - maximum of bytes to parse
73
     * @throws EncodingException
74
     */
75
    public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT)
76
    {
77
        if (!mb_internal_encoding($encoding)) {
78
            throw new EncodingException('Unable to set internal character encoding to `' . $encoding . '`');
79
        }
80
        $this->cleanParam = new CleanParam();
81
        $this->host = new Host();
82
        $this->sitemap = new Sitemap();
83
        $this->userAgent = new UserAgent();
84
        if (is_int($byteLimit) && $byteLimit > 0) {
85
            $content = mb_strcut($content, 0, $byteLimit);
86
        }
87
        $this->parseTxt($content);
88
    }
89
90
    /**
91
     * Parse robots.txt
92
     *
93
     * @param string $txt
94
     * @return void
95
     */
96
    private function parseTxt($txt)
97
    {
98
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $txt)));
99
        // Parse each line individually
100
        foreach ($lines as $line) {
101
            // Limit rule length
102
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
103
            // Remove comments
104
            $line = mb_split('#', $line, 2)[0];
105
            // Parse line
106
            $this->add($line);
107
        }
108
    }
109
110
    /**
111
     * Add line
112
     *
113
     * @param string $line
114
     * @return bool
115
     */
116
    public function add($line)
117
    {
118
        $previousDirective = $this->previousDirective;
119
        $pair = $this->generateRulePair($line, self::TOP_LEVEL_DIRECTIVES);
120
        if ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
121
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
122
                $this->userAgentValues = [];
123
            }
124
            $this->userAgentValues[] = $pair['value'];
125
        }
126
        $this->previousDirective = $pair['directive'];
127
        switch ($pair['directive']) {
128
            case self::DIRECTIVE_CLEAN_PARAM:
129
                return $this->cleanParam->add($pair['value']);
130
            case self::DIRECTIVE_HOST:
131
                return $this->host->add($pair['value']);
132
            case self::DIRECTIVE_SITEMAP:
133
                return $this->sitemap->add($pair['value']);
134
            case self::DIRECTIVE_USER_AGENT:
135
                return $this->userAgent->set($this->userAgentValues);
136
        }
137
        return $this->userAgent->add($line);
138
    }
139
140
    /**
141
     * Export
142
     *
143
     * @return array
144
     */
145
    public function export()
146
    {
147
        return $this->cleanParam->export()
148
        + $this->host->export()
149
        + $this->sitemap->export()
150
        + $this->userAgent->export();
151
    }
152
}
153