Completed
Push — master ( 2c514d...b5f31e )
by Jan-Petter
05:00
created

XRobotsTagParser::parse()   B

Complexity

Conditions 5
Paths 6

Size

Total Lines 15
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 13
Bugs 4 Features 0
Metric Value
c 13
b 4
f 0
dl 0
loc 15
rs 8.8571
cc 5
eloc 10
nc 6
nop 1
1
<?php
2
namespace vipnytt;
3
4
/**
5
 * X-Robots-Tag HTTP header parser class
6
 *
7
 * @author VIP nytt ([email protected])
8
 * @author Jan-Petter Gundersen ([email protected])
9
 *
10
 * Project:
11
 * @link https://github.com/VIPnytt/X-Robots-Tag-parser
12
 * @license https://opensource.org/licenses/MIT MIT license
13
 *
14
 * Specification:
15
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header
16
 */
17
18
use vipnytt\XRobotsTagParser\Exceptions\XRobotsTagParserException;
19
use vipnytt\XRobotsTagParser\Rebuild;
20
use vipnytt\XRobotsTagParser\UserAgentParser;
21
22
class XRobotsTagParser
23
{
24
    const HEADER_RULE_IDENTIFIER = 'X-Robots-Tag';
25
26
    const DIRECTIVE_ALL = 'all';
27
    const DIRECTIVE_NONE = 'none';
28
    const DIRECTIVE_NO_ARCHIVE = 'noarchive';
29
    const DIRECTIVE_NO_FOLLOW = 'nofollow';
30
    const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex';
31
    const DIRECTIVE_NO_INDEX = 'noindex';
32
    const DIRECTIVE_NO_ODP = 'noodp';
33
    const DIRECTIVE_NO_SNIPPET = 'nosnippet';
34
    const DIRECTIVE_NO_TRANSLATE = 'notranslate';
35
    const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after';
36
37
    protected $userAgent = '';
38
    protected $userAgentMatch = '';
39
40
    protected $currentRule = '';
41
    protected $currentUserAgent;
42
43
    protected $rules = [];
44
45
    /**
46
     * Constructor
47
     *
48
     * @param string $userAgent
49
     * @param array $headers
50
     */
51
    public function __construct($userAgent = '', $headers = null)
52
    {
53
        $this->userAgent = $userAgent;
54
        if (isset($headers)) {
55
            $this->parse($headers);
56
        }
57
    }
58
59
    /**
60
     * Parse HTTP headers
61
     *
62
     * @param array $headers
63
     * @return void
64
     */
65
    public function parse(array $headers)
66
    {
67
        foreach ($headers as $header) {
68
            $parts = array_map('trim', explode(':', mb_strtolower($header), 2));
69
            if (count($parts) < 2 || $parts[0] != mb_strtolower(self::HEADER_RULE_IDENTIFIER)) {
70
                // Header is not a rule
71
                continue;
72
            }
73
            $this->currentRule = $parts[1];
74
            $this->detectDirectives();
75
        }
76
        $userAgentParser = new UserAgentParser($this->userAgent);
77
        $match = $userAgentParser->match(array_keys($this->rules), '');
78
        $this->userAgentMatch = (is_string($match)) ? $match : '';
79
    }
80
81
    /**
82
     * Detect directives in rule
83
     *
84
     * @return void
85
     */
86
    protected function detectDirectives()
87
    {
88
        $directives = array_map('trim', explode(',', $this->currentRule));
89
        $pair = array_map('trim', explode(':', $directives[0], 2));
90
        if (count($pair) == 2 && !in_array($pair[0], array_keys($this->directiveClasses()))) {
91
            $this->currentUserAgent = $pair[0];
92
            $directives[0] = $pair[1];
93
        }
94
        foreach ($directives as $rule) {
95
            $directive = trim(explode(':', $rule, 2)[0]);
96
            if (in_array($directive, array_keys($this->directiveClasses()))) {
97
                $this->addRule($directive);
98
            }
99
        }
100
        $this->cleanup();
101
    }
102
103
    /**
104
     * Array of directives and their class names
105
     *
106
     * @return array
107
     */
108
    protected function directiveClasses()
109
    {
110
        return [
111
            self::DIRECTIVE_ALL => 'All',
112
            self::DIRECTIVE_NO_ARCHIVE => 'NoArchive',
113
            self::DIRECTIVE_NO_FOLLOW => 'NoFollow',
114
            self::DIRECTIVE_NO_IMAGE_INDEX => 'NoImageIndex',
115
            self::DIRECTIVE_NO_INDEX => 'NoIndex',
116
            self::DIRECTIVE_NONE => 'None',
117
            self::DIRECTIVE_NO_ODP => 'NoODP',
118
            self::DIRECTIVE_NO_SNIPPET => 'NoSnippet',
119
            self::DIRECTIVE_NO_TRANSLATE => 'NoTranslate',
120
            self::DIRECTIVE_UNAVAILABLE_AFTER => 'UnavailableAfter',
121
        ];
122
    }
123
124
    /**
125
     * Add rule
126
     *
127
     * @param string $directive
128
     * @return void
129
     * @throws XRobotsTagParserException
130
     */
131
    protected function addRule($directive)
132
    {
133
        if (!isset($this->rules[$this->currentUserAgent])) {
134
            $this->rules[$this->currentUserAgent] = [];
135
        }
136
        $class = "\\" . __CLASS__ . "\\directives\\" . $this->directiveClasses()[$directive];
137
        $object = new $class($this->currentRule);
138
        if (!$object instanceof XRobotsTagParser\directives\DirectiveInterface) {
139
            throw new XRobotsTagParserException('Unsupported directive class');
140
        }
141
        $this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], [$object->getDirective() => $object->getValue()]);
142
    }
143
144
    /**
145
     * Cleanup before next rule is read
146
     *
147
     * @return void
148
     */
149
    protected function cleanup()
150
    {
151
        $this->currentRule = '';
152
        $this->currentUserAgent = '';
153
    }
154
155
    /**
156
     * Return all applicable rules
157
     *
158
     * @param bool $raw
159
     * @return array
160
     */
161
    public function getRules($raw = false)
162
    {
163
        $rules = [];
164
        // Default UserAgent
165
        if (isset($this->rules[''])) {
166
            $rules = array_merge($rules, $this->rules['']);
167
        }
168
        // Matching UserAgent
169
        if (isset($this->rules[$this->userAgentMatch])) {
170
            $rules = array_merge($rules, $this->rules[$this->userAgentMatch]);
171
        }
172
        if (!$raw) {
173
            $rebuild = new Rebuild($rules);
174
            $rules = $rebuild->getResult();
175
        }
176
        // Result
177
        return $rules;
178
    }
179
180
    /**
181
     * Export all rules for all UserAgents
182
     *
183
     * @return array
184
     */
185
    public function export()
186
    {
187
        return $this->rules;
188
    }
189
190
    /**
191
     * Get the meaning of an Directive
192
     *
193
     * @param string $directive
194
     * @return string
195
     * @throws XRobotsTagParserException
196
     */
197
    public function getDirectiveMeaning($directive)
198
    {
199
        if (!in_array($directive, array_keys($this->directiveClasses()))) {
200
            throw new XRobotsTagParserException('Unknown directive');
201
        }
202
        $class = "\\" . __CLASS__ . "\\directives\\" . $this->directiveClasses()[$directive];
203
        $object = new $class($this->directiveClasses()[$directive]);
204
        if (!$object instanceof XRobotsTagParser\directives\DirectiveInterface) {
205
            throw new XRobotsTagParserException('Unsupported directive class');
206
        }
207
        return $object->getMeaning();
208
    }
209
}
210