Completed
Push — master ( 1f7f5f...3bc145 )
by Jan-Petter
06:44
created

XRobotsTagParser::directiveClasses()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 15
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 15
rs 9.4285
cc 1
eloc 12
nc 1
nop 0
1
<?php
2
namespace vipnytt;
3
4
/**
5
 * X-Robots-Tag HTTP header parser class
6
 *
7
 * @author VIP nytt ([email protected])
8
 * @author Jan-Petter Gundersen ([email protected])
9
 *
10
 * Project:
11
 * @link https://github.com/VIPnytt/X-Robots-Tag-parser
12
 * @license https://opensource.org/licenses/MIT MIT license
13
 *
14
 * Specification:
15
 * @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header
16
 */
17
18
use vipnytt\XRobotsTagParser\Rebuild;
19
use vipnytt\XRobotsTagParser\URLParser;
20
use vipnytt\XRobotsTagParser\UserAgentParser;
21
22
class XRobotsTagParser
23
{
24
    const HEADER_RULE_IDENTIFIER = 'x-robots-tag';
25
    const USERAGENT_DEFAULT = '';
26
27
    const DIRECTIVE_ALL = 'all';
28
    const DIRECTIVE_NONE = 'none';
29
    const DIRECTIVE_NO_ARCHIVE = 'noarchive';
30
    const DIRECTIVE_NO_FOLLOW = 'nofollow';
31
    const DIRECTIVE_NO_IMAGE_INDEX = 'noimageindex';
32
    const DIRECTIVE_NO_INDEX = 'noindex';
33
    const DIRECTIVE_NO_ODP = 'noodp';
34
    const DIRECTIVE_NO_SNIPPET = 'nosnippet';
35
    const DIRECTIVE_NO_TRANSLATE = 'notranslate';
36
    const DIRECTIVE_UNAVAILABLE_AFTER = 'unavailable_after';
37
38
    private $url = '';
39
    private $userAgent = self::USERAGENT_DEFAULT;
40
41
    private $headers = [];
42
    private $currentRule = '';
43
    private $currentUserAgent = self::USERAGENT_DEFAULT;
44
45
    private $options = [];
46
    private $rules = [];
47
48
    /**
49
     * Constructor
50
     *
51
     * @param string $url
52
     * @param string $userAgent
53
     * @param array $options
54
     */
55
    public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, $options = [])
56
    {
57
        // Parse URL
58
        $urlParser = new URLParser(trim($url));
59
        if (!$urlParser->isValid()) {
60
            trigger_error('Invalid URL', E_USER_WARNING);
61
        }
62
        // Encode URL
63
        $this->url = $urlParser->encode();
64
        // Set any optional options
65
        $this->options = $options;
66
        // Get headers
67
        $this->getHeaders();
68
        // Parse rules
69
        $this->parse();
70
        // Set User-Agent
71
        $parser = new UserAgentParser($userAgent);
72
        $this->userAgent = $parser->match(array_keys($this->rules), self::USERAGENT_DEFAULT);
73
    }
74
75
    /**
76
     * Request HTTP headers
77
     *
78
     * @return bool
79
     */
80
    private function getHeaders()
81
    {
82
        if (isset($this->options['headers'])) {
83
            $this->headers = $this->options['headers'];
84
            return true;
85
        }
86
        $this->headers = get_headers($this->url);
87
        if ($this->headers === false) {
88
            trigger_error('Unable to fetch HTTP headers', E_USER_ERROR);
89
            return false;
90
        }
91
        return true;
92
    }
93
94
    /**
95
     * Parse HTTP headers
96
     *
97
     * @return void
98
     */
99
    private function parse()
100
    {
101
        foreach ($this->headers as $header) {
102
            $parts = array_map('trim', explode(':', mb_strtolower($header), 2));
103
            if (count($parts) < 2 || $parts[0] != self::HEADER_RULE_IDENTIFIER) {
104
                // Header is not a rule
105
                continue;
106
            }
107
            $this->currentRule = $parts[1];
108
            $this->detectDirectives();
109
        }
110
111
    }
112
113
    /**
114
     * Detect directives in rule
115
     *
116
     * @return void
117
     */
118
    private function detectDirectives()
119
    {
120
        $directives = array_map('trim', explode(',', $this->currentRule));
121
        $pair = array_map('trim', explode(':', $directives[0], 2));
122
        if (count($pair) == 2 && !in_array($pair[0], array_keys($this->directiveClasses()))) {
123
            $this->currentUserAgent = $pair[0];
124
            $directives[0] = $pair[1];
125
        }
126
        foreach ($directives as $rule) {
127
            $directive = trim(explode(':', $rule, 2)[0]);
128
            if (in_array($directive, array_keys($this->directiveClasses()))) {
129
                $this->addRule($this->directiveClasses()[$directive]);
130
            }
131
        }
132
        $this->cleanup();
133
    }
134
135
    /**
136
     * Array of directives and their class names
137
     *
138
     * @return array
139
     */
140
    protected function directiveClasses()
141
    {
142
        return [
143
            self::DIRECTIVE_ALL => 'All',
144
            self::DIRECTIVE_NO_ARCHIVE => 'NoArchive',
145
            self::DIRECTIVE_NO_FOLLOW => 'NoFollow',
146
            self::DIRECTIVE_NO_IMAGE_INDEX => 'NoImageIndex',
147
            self::DIRECTIVE_NO_INDEX => 'NoIndex',
148
            self::DIRECTIVE_NONE => 'None',
149
            self::DIRECTIVE_NO_ODP => 'NoODP',
150
            self::DIRECTIVE_NO_SNIPPET => 'NoSnippet',
151
            self::DIRECTIVE_NO_TRANSLATE => 'NoTranslate',
152
            self::DIRECTIVE_UNAVAILABLE_AFTER => 'UnavailableAfter',
153
        ];
154
    }
155
156
    /**
157
     * Add rule
158
     *
159
     * @param string $directive
160
     * @return void
161
     */
162
    private function addRule($directive)
163
    {
164
        if (!isset($this->rules[$this->currentUserAgent])) {
165
            $this->rules[$this->currentUserAgent] = [];
166
        }
167
        $class = __NAMESPACE__ . "\\XRobotsTagParser\\directives\\$directive";
168
        $object = new $class($this->currentRule, $this->options);
169
        if (!$object instanceof XRobotsTagParser\directives\directiveInterface) {
170
            trigger_error('Directive class invalid', E_USER_ERROR);
171
        }
172
        $this->rules[$this->currentUserAgent] = array_merge($this->rules[$this->currentUserAgent], [$object->getDirective() => $object->getValue()]);
173
    }
174
175
    /**
176
     * Cleanup before next rule is read
177
     *
178
     * @return void
179
     */
180
    private function cleanup()
181
    {
182
        $this->currentRule = '';
183
        $this->currentUserAgent = self::USERAGENT_DEFAULT;
184
    }
185
186
    /**
187
     * Return all applicable rules
188
     *
189
     * @param bool $raw
190
     * @return array
191
     */
192
    public function getRules($raw = false)
193
    {
194
        $rules = [];
195
        // Default UserAgent
196
        if (isset($this->rules[self::USERAGENT_DEFAULT])) {
197
            $rules = array_merge($rules, $this->rules[self::USERAGENT_DEFAULT]);
198
        }
199
        // Matching UserAgent
200
        if (isset($this->rules[$this->userAgent])) {
201
            $rules = array_merge($rules, $this->rules[$this->userAgent]);
202
        }
203
        if (!$raw) {
204
            $rebuild = new Rebuild($rules);
0 ignored issues
show
Documentation introduced by
$rules is of type array, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
205
            $rules = $rebuild->getResult();
206
        }
207
        // Result
208
        return $rules;
209
    }
210
211
    /**
212
     * Export all rules for all UserAgents
213
     *
214
     * @return array
215
     */
216
    public function export()
217
    {
218
        return $this->rules;
219
    }
220
}