Passed
Branch master (ccb2e2)
by Jan-Petter
02:38
created

XRobotsTagParser::useHeaders()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 11
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 11
rs 9.2
cc 4
eloc 8
nc 3
nop 1
1
<?php
2
/**
3
 * X-Robots-Tag HTTP header parser class
4
 *
5
 * @author VIP nytt ([email protected])
6
 * @author Jan-Petter Gundersen ([email protected])
7
 *
8
 * Project:
9
 * @link https://github.com/VIPnytt/X-Robots-Tag-parser
10
 * @license https://opensource.org/licenses/MIT MIT license
11
 *
12
 * Specification:
13
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header
14
 */
15
16
namespace vipnytt;
17
18
use vipnytt\XRobotsTagParser\directive;
19
use vipnytt\XRobotsTagParser\URLParser;
20
use vipnytt\XRobotsTagParser\UserAgentParser;
21
22
class XRobotsTagParser
23
{
24
    const HEADER_RULE_IDENTIFIER = 'x-robots-tag';
25
    const USERAGENT_DEFAULT = '';
26
27
    const DIRECTIVE_ALL = 'all';
28
    const DIRECTIVE_NONE = 'none';
29
    const DIRECTIVE_NO_ARCHIVE = 'noarchive';
30
    const DIRECTIVE_NO_FOLLOW = 'nofollow';
31
    const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex';
32
    const DIRECTIVE_NO_INDEX = 'noindex';
33
    const DIRECTIVE_NO_ODP = 'noodp';
34
    const DIRECTIVE_NO_SNIPPET = 'nosnippet';
35
    const DIRECTIVE_NO_TRANSLATE = 'notranslate';
36
    const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after';
37
38
    private $url = '';
39
    private $userAgent = self::USERAGENT_DEFAULT;
40
41
    private $headers = [];
42
    private $currentRule = '';
43
    private $currentUserAgent = self::USERAGENT_DEFAULT;
44
    private $currentDirective = '';
45
    private $currentValue = '';
46
47
    private $options = [];
48
    private $rules = [];
49
50
    /**
51
     * Constructor
52
     *
53
     * @param string $url
54
     * @param string $userAgent
55
     * @param array $options
56
     */
57
    public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, $options = [])
58
    {
59
        // Parse URL
60
        $urlParser = new URLParser(trim($url));
61
        if (!$urlParser->isValid()) {
62
            trigger_error('Invalid URL', E_USER_WARNING);
63
        }
64
        // Encode URL
65
        $this->url = $urlParser->encode();
66
        // Set any optional options
67
        $this->options = $options;
68
        // Get headers
69
        $this->getHeaders();
70
        // Parse rules
71
        $this->parse();
72
        // Set User-Agent
73
        $parser = new UserAgentParser($userAgent);
74
        $this->userAgent = $parser->match(array_keys($this->rules), self::USERAGENT_DEFAULT);
75
    }
76
77
    /**
78
     * Request HTTP headers
79
     *
80
     * @return bool
81
     */
82
    private function getHeaders()
83
    {
84
        if (isset($this->options['headers'])) {
85
            $this->headers = $this->options['headers'];
86
            return true;
87
        }
88
        $this->headers = get_headers($this->url);
89
        if ($this->headers === false) {
90
            trigger_error('Unable to fetch HTTP headers', E_USER_ERROR);
91
            return false;
92
        }
93
        return true;
94
    }
95
96
    /**
97
     * Parse HTTP headers
98
     *
99
     * @return void
100
     */
101
    private function parse()
102
    {
103
        foreach ($this->headers as $header) {
104
            $parts = explode(':', mb_strtolower($header), 2);
105
            if (count($parts) < 2 || $parts[0] != self::HEADER_RULE_IDENTIFIER) {
106
                // Header is not a rule
107
                continue;
108
            }
109
            $this->currentRule = trim($parts[1]);
110
            $this->detectDirectives();
111
        }
112
113
    }
114
115
    /**
116
     * Detect directives in rule
117
     *
118
     * @return void
119
     */
120
    private function detectDirectives()
121
    {
122
        $rules = explode(',', $this->currentRule);
123
        foreach ($rules as $rule) {
124
            $pair = array_map('trim', explode(':', $rule, 2));
125
            if ($rules[0] === $rule && count($pair) == 2 && !in_array($pair[0], $this->directiveArray())) {
126
                $this->currentUserAgent = $pair[0];
127
                $pair = array_map('trim', explode(':', $pair[1], 2));
128
            }
129
            if (in_array($pair[0], $this->directiveArray())) {
130
                $this->currentDirective = $pair[0];
131
                $this->currentValue = isset($pair[1]) ? $pair[1] : '';
132
                $this->addRule();
133
            }
134
        }
135
        $this->cleanup();
136
    }
137
138
    /**
139
     * Directives supported
140
     *
141
     * @return array
142
     */
143
    protected function directiveArray()
144
    {
145
        return [
146
            self::DIRECTIVE_ALL,
147
            self::DIRECTIVE_NONE,
148
            self::DIRECTIVE_NO_ARCHIVE,
149
            self::DIRECTIVE_NO_FOLLOW,
150
            self::DIRECTIVE_NO_IMAGE_INDEX,
151
            self::DIRECTIVE_NO_INDEX,
152
            self::DIRECTIVE_NO_ODP,
153
            self::DIRECTIVE_NO_SNIPPET,
154
            self::DIRECTIVE_NO_TRANSLATE,
155
            self::DIRECTIVE_UNAVAILABLE_AFTER
156
        ];
157
    }
158
159
    /**
160
     * Add rule
161
     *
162
     * @return void
163
     */
164
    private function addRule()
165
    {
166
        if (!isset($this->rules[$this->currentUserAgent])) {
167
            $this->rules[$this->currentUserAgent] = [];
168
        }
169
        $directive = new directive($this->currentDirective, $this->currentValue, $this->options);
170
        $this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], $directive->getArray());
171
    }
172
173
    /**
174
     * Cleanup before next rule is read
175
     *
176
     * @return void
177
     */
178
    private function cleanup()
179
    {
180
        $this->currentRule = '';
181
        $this->currentUserAgent = self::USERAGENT_DEFAULT;
182
        $this->currentDirective = '';
183
        $this->currentValue = '';
184
    }
185
186
    /**
187
     * Return all applicable rules
188
     *
189
     * @return array
190
     */
191
    public function getRules()
192
    {
193
        $rules = [];
194
        // Default UserAgent
195
        if (isset($this->rules[self::USERAGENT_DEFAULT])) {
196
            $rules = array_merge($rules, $this->rules[self::USERAGENT_DEFAULT]);
197
        }
198
        // Matching UserAgent
199
        if (isset($this->rules[$this->userAgent])) {
200
            $rules = array_merge($rules, $this->rules[$this->userAgent]);
201
        }
202
        // Result
203
        return $rules;
204
    }
205
206
    /**
207
     * Export all rules for all UserAgents
208
     *
209
     * @return array
210
     */
211
    public function export()
212
    {
213
        return $this->rules;
214
    }
215
}