Completed
Push — master ( 2c8763...82b108 )
by Jan-Petter
02:13
created

XRobotsTagParser::parse()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 14
Bugs 4 Features 0
Metric Value
c 14
b 4
f 0
dl 0
loc 13
rs 9.2
cc 4
eloc 8
nc 3
nop 1
1
<?php
2
namespace vipnytt;
3
4
/**
5
 * X-Robots-Tag HTTP header parser class
6
 *
7
 * @author VIP nytt ([email protected])
8
 * @author Jan-Petter Gundersen ([email protected])
9
 *
10
 * Project:
11
 * @link https://github.com/VIPnytt/X-Robots-Tag-parser
12
 * @license https://opensource.org/licenses/MIT MIT license
13
 *
14
 * Specification:
15
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header
16
 */
17
18
use vipnytt\XRobotsTagParser\Exceptions\XRobotsTagParserException;
19
use vipnytt\XRobotsTagParser\Rebuild;
20
use vipnytt\XRobotsTagParser\UserAgentParser;
21
22
class XRobotsTagParser
23
{
24
    const HEADER_RULE_IDENTIFIER = 'X-Robots-Tag';
25
26
    const DIRECTIVE_ALL = 'all';
27
    const DIRECTIVE_NONE = 'none';
28
    const DIRECTIVE_NO_ARCHIVE = 'noarchive';
29
    const DIRECTIVE_NO_FOLLOW = 'nofollow';
30
    const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex';
31
    const DIRECTIVE_NO_INDEX = 'noindex';
32
    const DIRECTIVE_NO_ODP = 'noodp';
33
    const DIRECTIVE_NO_SNIPPET = 'nosnippet';
34
    const DIRECTIVE_NO_TRANSLATE = 'notranslate';
35
    const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after';
36
37
    protected $userAgent = '';
38
    protected $userAgentMatch = '';
39
40
    protected $currentRule = '';
41
    protected $currentUserAgent;
42
43
    protected $rules = [];
44
45
    /**
46
     * Constructor
47
     *
48
     * @param string $userAgent
49
     * @param array $headers
50
     */
51
    public function __construct($userAgent = '', $headers = null)
52
    {
53
        $this->userAgent = $userAgent;
54
        if (isset($headers)) {
55
            $this->parse($headers);
56
        }
57
    }
58
59
    /**
60
     * Parse HTTP headers
61
     *
62
     * @param array $headers
63
     * @return void
64
     */
65
    public function parse(array $headers)
66
    {
67
        foreach ($headers as $header) {
68
            $parts = array_map('trim', explode(':', mb_strtolower($header), 2));
69
            if (count($parts) < 2 || $parts[0] != mb_strtolower(self::HEADER_RULE_IDENTIFIER)) {
70
                // Header is not a rule
71
                continue;
72
            }
73
            $this->currentRule = $parts[1];
74
            $this->detectDirectives();
75
        }
76
        $this->matchUserAgent();
77
    }
78
79
    /**
80
     * Detect directives in rule
81
     *
82
     * @return void
83
     */
84
    protected function detectDirectives()
85
    {
86
        $directives = array_map('trim', explode(',', $this->currentRule));
87
        $pair = array_map('trim', explode(':', $directives[0], 2));
88
        if (count($pair) == 2 && !in_array($pair[0], array_keys($this->directiveClasses()))) {
89
            $this->currentUserAgent = $pair[0];
90
            $directives[0] = $pair[1];
91
        }
92
        foreach ($directives as $rule) {
93
            $directive = trim(explode(':', $rule, 2)[0]);
94
            if (in_array($directive, array_keys($this->directiveClasses()))) {
95
                $this->addRule($directive);
96
            }
97
        }
98
        $this->cleanup();
99
    }
100
101
    /**
102
     * Array of directives and their class names
103
     *
104
     * @return array
105
     */
106
    protected function directiveClasses()
107
    {
108
        return [
109
            self::DIRECTIVE_ALL => 'All',
110
            self::DIRECTIVE_NO_ARCHIVE => 'NoArchive',
111
            self::DIRECTIVE_NO_FOLLOW => 'NoFollow',
112
            self::DIRECTIVE_NO_IMAGE_INDEX => 'NoImageIndex',
113
            self::DIRECTIVE_NO_INDEX => 'NoIndex',
114
            self::DIRECTIVE_NONE => 'None',
115
            self::DIRECTIVE_NO_ODP => 'NoODP',
116
            self::DIRECTIVE_NO_SNIPPET => 'NoSnippet',
117
            self::DIRECTIVE_NO_TRANSLATE => 'NoTranslate',
118
            self::DIRECTIVE_UNAVAILABLE_AFTER => 'UnavailableAfter',
119
        ];
120
    }
121
122
    /**
123
     * Add rule
124
     *
125
     * @param string $directive
126
     * @return void
127
     * @throws XRobotsTagParserException
128
     */
129
    protected function addRule($directive)
130
    {
131
        if (!isset($this->rules[$this->currentUserAgent])) {
132
            $this->rules[$this->currentUserAgent] = [];
133
        }
134
        $class = "\\" . __CLASS__ . "\\directives\\" . $this->directiveClasses()[$directive];
135
        $object = new $class($this->currentRule);
136
        if (!$object instanceof XRobotsTagParser\directives\DirectiveInterface) {
137
            throw new XRobotsTagParserException('Unsupported directive class');
138
        }
139
        $this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], [$object->getDirective() => $object->getValue()]);
140
    }
141
142
    /**
143
     * Cleanup before next rule is read
144
     *
145
     * @return void
146
     */
147
    protected function cleanup()
148
    {
149
        $this->currentRule = '';
150
        $this->currentUserAgent = '';
151
    }
152
153
    /**
154
     * Find the most rule-matching User-Agent
155
     *
156
     * @return string
157
     */
158
    protected function matchUserAgent()
159
    {
160
        $userAgentParser = new UserAgentParser($this->userAgent);
161
        $match = $userAgentParser->match(array_keys($this->rules), '');
162
        $this->userAgentMatch = (is_string($match)) ? $match : '';
163
        return $this->userAgentMatch;
164
    }
165
166
    /**
167
     * Return all applicable rules
168
     *
169
     * @param bool $raw
170
     * @return array
171
     */
172
    public function getRules($raw = false)
173
    {
174
        $rules = [];
175
        // Default UserAgent
176
        if (isset($this->rules[''])) {
177
            $rules = array_merge($rules, $this->rules['']);
178
        }
179
        // Matching UserAgent
180
        if (isset($this->rules[$this->userAgentMatch])) {
181
            $rules = array_merge($rules, $this->rules[$this->userAgentMatch]);
182
        }
183
        if (!$raw) {
184
            $rebuild = new Rebuild($rules);
185
            $rules = $rebuild->getResult();
186
        }
187
        // Result
188
        return $rules;
189
    }
190
191
    /**
192
     * Export all rules for all UserAgents
193
     *
194
     * @return array
195
     */
196
    public function export()
197
    {
198
        return $this->rules;
199
    }
200
201
    /**
202
     * Get the meaning of an Directive
203
     *
204
     * @param string $directive
205
     * @return string
206
     * @throws XRobotsTagParserException
207
     */
208
    public function getDirectiveMeaning($directive)
209
    {
210
        if (!in_array($directive, array_keys($this->directiveClasses()))) {
211
            throw new XRobotsTagParserException('Unknown directive');
212
        }
213
        $class = "\\" . __CLASS__ . "\\directives\\" . $this->directiveClasses()[$directive];
214
        $object = new $class($this->directiveClasses()[$directive]);
215
        if (!$object instanceof XRobotsTagParser\directives\DirectiveInterface) {
216
            throw new XRobotsTagParserException('Unsupported directive class');
217
        }
218
        return $object->getMeaning();
219
    }
220
}
221