Completed
Push — master ( e39c89...2b63d8 )
by Jan-Petter
02:13
created

Parser::__construct()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 18
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 12
Bugs 5 Features 0
Metric Value
c 12
b 5
f 0
dl 0
loc 18
rs 8.8571
cc 5
eloc 13
nc 4
nop 3
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\EncodingException;
5
use vipnytt\RobotsTxtParser\Parser\Directives\CleanParam;
6
use vipnytt\RobotsTxtParser\Parser\Directives\Host;
7
use vipnytt\RobotsTxtParser\Parser\Directives\Sitemap;
8
use vipnytt\RobotsTxtParser\Parser\Directives\UserAgent;
9
use vipnytt\RobotsTxtParser\Parser\RobotsTxtInterface;
10
use vipnytt\RobotsTxtParser\Parser\Toolbox;
11
12
/**
13
 * Class Core
14
 *
15
 * @package vipnytt\RobotsTxtParser
16
 */
17
abstract class Parser implements RobotsTxtInterface
18
{
19
    use Toolbox;
20
21
    /**
22
     * Directive white list
23
     */
24
    const TOP_LEVEL_DIRECTIVES = [
25
        self::DIRECTIVE_CLEAN_PARAM,
26
        self::DIRECTIVE_HOST,
27
        self::DIRECTIVE_SITEMAP,
28
        self::DIRECTIVE_USER_AGENT,
29
    ];
30
31
    /**
32
     * Previous directive
33
     * @var string
34
     */
35
    protected $previousDirective;
36
37
    /**
38
     * Current user-agent(s)
39
     * @var array
40
     */
41
    protected $userAgentValues;
42
43
    /**
44
     * Clean-param class
45
     * @var CleanParam
46
     */
47
    protected $cleanParam;
48
49
    /**
50
     * Host class
51
     * @var Host
52
     */
53
    protected $host;
54
55
    /**
56
     * Sitemap class
57
     * @var Sitemap
58
     */
59
    protected $sitemap;
60
61
    /**
62
     * User-agent class
63
     * @var UserAgent
64
     */
65
    protected $userAgent;
66
67
    /**
68
     * Core constructor.
69
     *
70
     * @param string $content - file content
71
     * @param string $encoding - character encoding
72
     * @param int|null $byteLimit - maximum of bytes to parse
73
     * @throws EncodingException
74
     */
75
    public function __construct($content, $encoding = self::ENCODING, $byteLimit = self::BYTE_LIMIT)
76
    {
77
        try {
78
            if (!mb_internal_encoding($encoding)) {
79
                throw new EncodingException('Unable to set internal character encoding to ' . $encoding);
80
            }
81
        } catch (\Exception $e) {
82
            throw new EncodingException($e);
83
        }
84
        $this->cleanParam = new CleanParam();
85
        $this->host = new Host();
86
        $this->sitemap = new Sitemap();
87
        $this->userAgent = new UserAgent();
88
        if (is_int($byteLimit) && $byteLimit > 0) {
89
            $content = mb_strcut($content, 0, $byteLimit);
90
        }
91
        $this->parseTxt($content);
92
    }
93
94
    /**
95
     * Parse robots.txt
96
     *
97
     * @param string $txt
98
     * @return void
99
     */
100
    private function parseTxt($txt)
101
    {
102
        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $txt)));
103
        // Parse each line individually
104
        foreach ($lines as $line) {
105
            // Limit rule length
106
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
107
            // Remove comments
108
            $line = mb_split('#', $line, 2)[0];
109
            // Parse line
110
            $this->add($line);
111
        }
112
    }
113
114
    /**
115
     * Add line
116
     *
117
     * @param string $line
118
     * @return bool
119
     */
120
    public function add($line)
121
    {
122
        $previousDirective = $this->previousDirective;
123
        $pair = $this->generateRulePair($line, self::TOP_LEVEL_DIRECTIVES);
124
        if ($pair['directive'] === self::DIRECTIVE_USER_AGENT) {
125
            if ($previousDirective !== self::DIRECTIVE_USER_AGENT) {
126
                $this->userAgentValues = [];
127
            }
128
            $this->userAgentValues[] = $pair['value'];
129
        }
130
        $this->previousDirective = $pair['directive'];
131
        switch ($pair['directive']) {
132
            case self::DIRECTIVE_CLEAN_PARAM:
133
                return $this->cleanParam->add($pair['value']);
134
            case self::DIRECTIVE_HOST:
135
                return $this->host->add($pair['value']);
136
            case self::DIRECTIVE_SITEMAP:
137
                return $this->sitemap->add($pair['value']);
138
            case self::DIRECTIVE_USER_AGENT:
139
                return $this->userAgent->set($this->userAgentValues);
140
        }
141
        return $this->userAgent->add($line);
142
    }
143
144
    /**
145
     * Export
146
     *
147
     * @return array
148
     */
149
    public function export()
150
    {
151
        return $this->cleanParam->export()
152
        + $this->host->export()
153
        + $this->sitemap->export()
154
        + $this->userAgent->export();
155
    }
156
}
157