Completed
Push — master ( 5753af...41dae5 )
by Jan-Petter
06:59
created

XRobotsTagParser::getDirectiveMeaning()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 12
rs 9.4285
cc 3
eloc 8
nc 3
nop 1
1
<?php
2
namespace vipnytt;
3
4
/**
5
 * X-Robots-Tag HTTP header parser class
6
 *
7
 * @author VIP nytt ([email protected])
8
 * @author Jan-Petter Gundersen ([email protected])
9
 *
10
 * Project:
11
 * @link https://github.com/VIPnytt/X-Robots-Tag-parser
12
 * @license https://opensource.org/licenses/MIT MIT license
13
 *
14
 * Specification:
15
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header
16
 */
17
18
use GuzzleHttp;
19
use vipnytt\XRobotsTagParser\Exceptions\XRobotsTagParserException;
20
use vipnytt\XRobotsTagParser\Rebuild;
21
use vipnytt\XRobotsTagParser\URLParser;
22
use vipnytt\XRobotsTagParser\UserAgentParser;
23
24
class XRobotsTagParser
25
{
26
    const HEADER_RULE_IDENTIFIER = 'x-robots-tag';
27
    const USERAGENT_DEFAULT = '';
28
29
    const DIRECTIVE_ALL = 'all';
30
    const DIRECTIVE_NONE = 'none';
31
    const DIRECTIVE_NO_ARCHIVE = 'noarchive';
32
    const DIRECTIVE_NO_FOLLOW = 'nofollow';
33
    const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex';
34
    const DIRECTIVE_NO_INDEX = 'noindex';
35
    const DIRECTIVE_NO_ODP = 'noodp';
36
    const DIRECTIVE_NO_SNIPPET = 'nosnippet';
37
    const DIRECTIVE_NO_TRANSLATE = 'notranslate';
38
    const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after';
39
40
    protected $url = '';
41
    protected $userAgent = self::USERAGENT_DEFAULT;
42
    protected $config = [];
43
44
    protected $headers = [];
45
    protected $currentRule = '';
46
    protected $currentUserAgent = self::USERAGENT_DEFAULT;
47
48
    protected $rules = [];
49
50
    /**
51
     * Constructor
52
     *
53
     * @param string $url
54
     * @param string $userAgent
55
     * @param array $config
56
     * @throws XRobotsTagParserException
57
     */
58
    public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $config = [])
59
    {
60
        // Parse URL
61
        $urlParser = new URLParser(trim($url));
62
        if (!$urlParser->isValid()) {
63
            throw new XRobotsTagParserException('Invalid URL');
64
        }
65
        // Encode URL
66
        $this->url = $urlParser->encode();
67
        // Set any optional configuration options
68
        $this->config = $config;
69
        if (isset($this->config['headers']) && is_array($this->config['headers'])) {
70
            $this->headers = $this->config['headers'];
71
        }
72
        // Parse rules
73
        $this->parse();
74
        // Set User-Agent
75
        $parser = new UserAgentParser($userAgent);
76
        $this->userAgent = $parser->match(array_keys($this->rules), self::USERAGENT_DEFAULT);
77
    }
78
79
    /**
80
     * Parse HTTP headers
81
     *
82
     * @return void
83
     */
84
    protected function parse()
85
    {
86
        if (empty($this->headers)) {
87
            $this->getHeaders();
88
        }
89
        foreach ($this->headers as $header) {
90
            $parts = array_map('trim', explode(':', mb_strtolower($header), 2));
91
            if (count($parts) < 2 || $parts[0] != self::HEADER_RULE_IDENTIFIER) {
92
                // Header is not a rule
93
                continue;
94
            }
95
            $this->currentRule = $parts[1];
96
            $this->detectDirectives();
97
        }
98
99
    }
100
101
    /**
102
     * Request the HTTP headers from an URL
103
     *
104
     * @return array Raw HTTP headers
105
     * @throws XRobotsTagParserException
106
     */
107
    protected function getHeaders()
108
    {
109
        if (!filter_var($this->url, FILTER_VALIDATE_URL)) {
110
            throw new XRobotsTagParserException('Passed URL not valid according to the filter_var function');
111
        }
112
        try {
113
            if (!isset($this->config['guzzle']['headers']['User-Agent'])) {
114
                $this->config['guzzle']['headers']['User-Agent'] = $this->userAgent;
115
            }
116
            $client = new GuzzleHttp\Client();
117
            $res = $client->head($this->url, $this->config['guzzle']);
118
            return $res->getHeaders();
119
        } catch (GuzzleHttp\Exception\TransferException $e) {
120
            throw new XRobotsTagParserException($e->getMessage());
121
        }
122
    }
123
124
    /**
125
     * Detect directives in rule
126
     *
127
     * @return void
128
     */
129
    protected function detectDirectives()
130
    {
131
        $directives = array_map('trim', explode(',', $this->currentRule));
132
        $pair = array_map('trim', explode(':', $directives[0], 2));
133
        if (count($pair) == 2 && !in_array($pair[0], array_keys($this->directiveClasses()))) {
134
            $this->currentUserAgent = $pair[0];
135
            $directives[0] = $pair[1];
136
        }
137
        foreach ($directives as $rule) {
138
            $directive = trim(explode(':', $rule, 2)[0]);
139
            if (in_array($directive, array_keys($this->directiveClasses()))) {
140
                $this->addRule($this->directiveClasses()[$directive]);
141
            }
142
        }
143
        $this->cleanup();
144
    }
145
146
    /**
147
     * Array of directives and their class names
148
     *
149
     * @return array
150
     */
151
    protected function directiveClasses()
152
    {
153
        return [
154
            self::DIRECTIVE_ALL => 'All',
155
            self::DIRECTIVE_NO_ARCHIVE => 'NoArchive',
156
            self::DIRECTIVE_NO_FOLLOW => 'NoFollow',
157
            self::DIRECTIVE_NO_IMAGE_INDEX => 'NoImageIndex',
158
            self::DIRECTIVE_NO_INDEX => 'NoIndex',
159
            self::DIRECTIVE_NONE => 'None',
160
            self::DIRECTIVE_NO_ODP => 'NoODP',
161
            self::DIRECTIVE_NO_SNIPPET => 'NoSnippet',
162
            self::DIRECTIVE_NO_TRANSLATE => 'NoTranslate',
163
            self::DIRECTIVE_UNAVAILABLE_AFTER => 'UnavailableAfter',
164
        ];
165
    }
166
167
    /**
168
     * Add rule
169
     *
170
     * @param string $directive
171
     * @return void
172
     * @throws XRobotsTagParserException
173
     */
174
    protected function addRule($directive)
175
    {
176
        if (!isset($this->rules[$this->currentUserAgent])) {
177
            $this->rules[$this->currentUserAgent] = [];
178
        }
179
        $class = "XRobotsTagParser\\directives\\$directive";
180
        $object = new $class($this->currentRule);
181
        if (!$object instanceof XRobotsTagParser\directives\directiveInterface) {
182
            throw new XRobotsTagParserException('Unsupported directive class');
183
        }
184
        $this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], [$object->getDirective() => $object->getValue()]);
185
    }
186
187
    /**
188
     * Cleanup before next rule is read
189
     *
190
     * @return void
191
     */
192
    protected function cleanup()
193
    {
194
        $this->currentRule = '';
195
        $this->currentUserAgent = self::USERAGENT_DEFAULT;
196
    }
197
198
    /**
199
     * Return all applicable rules
200
     *
201
     * @param bool $raw
202
     * @return array
203
     */
204
    public function getRules($raw = false)
205
    {
206
        $rules = [];
207
        // Default UserAgent
208
        if (isset($this->rules[self::USERAGENT_DEFAULT])) {
209
            $rules = array_merge($rules, $this->rules[self::USERAGENT_DEFAULT]);
210
        }
211
        // Matching UserAgent
212
        if (isset($this->rules[$this->userAgent])) {
213
            $rules = array_merge($rules, $this->rules[$this->userAgent]);
214
        }
215
        if (!$raw) {
216
            $rebuild = new Rebuild($rules);
217
            $rules = $rebuild->getResult();
218
        }
219
        // Result
220
        return $rules;
221
    }
222
223
    /**
224
     * Export all rules for all UserAgents
225
     *
226
     * @return array
227
     */
228
    public function export()
229
    {
230
        return $this->rules;
231
    }
232
233
    /**
234
     * Get the meaning of an Directive
235
     *
236
     * @param string $directive
237
     * @return string
238
     * @throws XRobotsTagParserException
239
     */
240
    public function getDirectiveMeaning($directive)
241
    {
242
        if (!in_array($directive, array_keys($this->directiveClasses()))) {
243
            throw new XRobotsTagParserException('Unknown directive');
244
        }
245
        $class = "XRobotsTagParser\\directives\\$directive";
246
        $object = new $class($directive);
247
        if (!$object instanceof XRobotsTagParser\directives\directiveInterface) {
248
            throw new XRobotsTagParserException('Unsupported directive class');
249
        }
250
        return $object->getMeaning();
251
    }
252
}
253