Passed
Branch master (b142f3)
by Jan-Petter
02:30
created

XRobotsTagParser::addRule()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 11
Bugs 2 Features 0
Metric Value
c 11
b 2
f 0
dl 0
loc 8
rs 9.4285
cc 2
eloc 5
nc 2
nop 0
1
<?php
2
/**
3
 * X-Robots-Tag HTTP header parser class
4
 *
5
 * @author VIP nytt ([email protected])
6
 * @author Jan-Petter Gundersen ([email protected])
7
 *
8
 * Project:
9
 * @link https://github.com/VIPnytt/X-Robots-Tag-parser
10
 * @license https://opensource.org/licenses/MIT MIT license
11
 *
12
 * Specification:
13
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header
14
 */
15
16
namespace vipnytt;
17
18
use vipnytt\XRobotsTagParser\directive;
19
use vipnytt\XRobotsTagParser\URLParser;
20
use vipnytt\XRobotsTagParser\UserAgentParser;
21
22
class XRobotsTagParser
23
{
24
    const HEADER_RULE_IDENTIFIER = 'x-robots-tag';
25
    const USERAGENT_DEFAULT = '';
26
27
    const DIRECTIVE_ALL = 'all';
28
    const DIRECTIVE_NONE = 'none';
29
    const DIRECTIVE_NO_ARCHIVE = 'noarchive';
30
    const DIRECTIVE_NO_FOLLOW = 'nofollow';
31
    const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex';
32
    const DIRECTIVE_NO_INDEX = 'noindex';
33
    const DIRECTIVE_NO_ODP = 'noodp';
34
    const DIRECTIVE_NO_SNIPPET = 'nosnippet';
35
    const DIRECTIVE_NO_TRANSLATE = 'notranslate';
36
    const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after';
37
38
    private $strict = false;
0 ignored issues
show
Unused Code introduced by
The property $strict is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
39
40
    private $url = '';
41
    private $userAgent = self::USERAGENT_DEFAULT;
42
43
    private $headers = [];
44
    private $currentRule = '';
45
    private $currentUserAgent = self::USERAGENT_DEFAULT;
46
    private $currentDirective = '';
47
    private $currentValue = '';
48
49
    private $rules = [];
50
51
    /**
52
     * Constructor
53
     *
54
     * @param string $url
55
     * @param string $userAgent
56
     * @param array|null $headers
57
     */
58
    public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, $headers = null)
59
    {
60
        // Parse URL
61
        $urlParser = new URLParser(trim($url));
62
        if (!$urlParser->isValid()) {
63
            trigger_error('Invalid URL', E_USER_WARNING);
64
        }
65
        $this->url = $urlParser->encode();
66
        // Get headers
67
        $this->useHeaders($headers);
68
        // Parse rules
69
        $this->parse();
70
        // Set User-Agent
71
        $parser = new UserAgentParser($userAgent);
72
        $this->userAgent = $parser->match(array_keys($this->rules), self::USERAGENT_DEFAULT);
73
    }
74
75
    /**
76
     * Request HTTP headers
77
     *
78
     * @param array|null|false $customHeaders - use these headers
79
     * @return bool
80
     */
81
    private function useHeaders($customHeaders = null)
82
    {
83
        if ($customHeaders === false) {
84
            trigger_error('Unable to fetch HTTP headers', E_USER_ERROR);
85
            return false;
86
        } elseif (!is_array($customHeaders) || empty($customHeaders)) {
87
            return $this->useHeaders(get_headers($this->url));
88
        }
89
        $this->headers = $customHeaders;
90
        return true;
91
    }
92
93
    /**
94
     * Parse HTTP headers
95
     *
96
     * @return void
97
     */
98
    private function parse()
99
    {
100
        foreach ($this->headers as $header) {
101
            $parts = explode(':', mb_strtolower($header), 2);
102
            if (count($parts) < 2 || $parts[0] != self::HEADER_RULE_IDENTIFIER) {
103
                // Header is not a rule
104
                continue;
105
            }
106
            $this->currentRule = trim($parts[1]);
107
            $this->detectDirectives();
108
        }
109
110
    }
111
112
    /**
113
     * Detect directives in rule
114
     *
115
     * @return void
116
     */
117
    private function detectDirectives()
118
    {
119
        $rules = explode(',', $this->currentRule);
120
        foreach ($rules as $rule) {
121
            $pair = array_map('trim', explode(':', $rule, 2));
122
            if ($rules[0] === $rule && count($pair) == 2 && !in_array($pair[0], $this->directiveArray())) {
123
                $this->currentUserAgent = $pair[0];
124
                $pair = array_map('trim', explode(':', $pair[1], 2));
125
            }
126
            if (in_array($pair[0], $this->directiveArray())) {
127
                $this->currentDirective = $pair[0];
128
                $this->currentValue = isset($pair[1]) ? $pair[1] : '';
129
                $this->addRule();
130
            }
131
        }
132
        $this->cleanup();
133
    }
134
135
    /**
136
     * Directives supported
137
     *
138
     * @return array
139
     */
140
    protected function directiveArray()
141
    {
142
        return [
143
            self::DIRECTIVE_ALL,
144
            self::DIRECTIVE_NONE,
145
            self::DIRECTIVE_NO_ARCHIVE,
146
            self::DIRECTIVE_NO_FOLLOW,
147
            self::DIRECTIVE_NO_IMAGE_INDEX,
148
            self::DIRECTIVE_NO_INDEX,
149
            self::DIRECTIVE_NO_ODP,
150
            self::DIRECTIVE_NO_SNIPPET,
151
            self::DIRECTIVE_NO_TRANSLATE,
152
            self::DIRECTIVE_UNAVAILABLE_AFTER
153
        ];
154
    }
155
156
    /**
157
     * Add rule
158
     *
159
     * @return void
160
     */
161
    private function addRule()
162
    {
163
        if (!isset($this->rules[$this->currentUserAgent])) {
164
            $this->rules[$this->currentUserAgent] = [];
165
        }
166
        $directive = new directive($this->currentDirective, $this->currentValue);
167
        $this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], $directive->getArray());
168
    }
169
170
    /**
171
     * Cleanup before next rule is read
172
     *
173
     * @return void
174
     */
175
    private function cleanup()
176
    {
177
        $this->currentRule = '';
178
        $this->currentUserAgent = self::USERAGENT_DEFAULT;
179
        $this->currentDirective = '';
180
        $this->currentValue = '';
181
    }
182
183
    /**
184
     * Return all applicable rules
185
     *
186
     * @return array
187
     */
188
    public function getRules()
189
    {
190
        $rules = [];
191
        // Default UserAgent
192
        if (isset($this->rules[self::USERAGENT_DEFAULT])) {
193
            $rules = array_merge($rules, $this->rules[self::USERAGENT_DEFAULT]);
194
        }
195
        // Matching UserAgent
196
        if (isset($this->rules[$this->userAgent])) {
197
            $rules = array_merge($rules, $this->rules[$this->userAgent]);
198
        }
199
        // Result
200
        return $rules;
201
    }
202
203
    /**
204
     * Export all rules for all UserAgents
205
     *
206
     * @return array
207
     */
208
    public function export()
209
    {
210
        return $this->rules;
211
    }
212
}