Completed
Push — master ( fda467...a7ec5f )
by Jan-Petter
02:10
created

Client::setUserAgent()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 12
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 12
rs 9.4285
cc 3
eloc 8
nc 2
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser;
3
4
use vipnytt\UserAgentParser;
5
6
class Client implements RobotsTxtInterface
7
{
8
    use UrlToolbox;
9
10
    protected $rules = [];
11
12
    protected $url = '';
13
    protected $statusCode = 200;
14
15
    protected $userAgent = self::USER_AGENT;
16
17
    /**
18
     * Constructor
19
     *
20
     * @param string $content - file content
21
     * @param string|null $encoding - character encoding
22
     * @param integer|null $byteLimit - maximum of bytes to parse
23
     * @param integer|null $maxRuleLength - max length of each rule
24
     */
25
    public function __construct($content, $encoding = null, $byteLimit = self::BYTE_LIMIT, $maxRuleLength = self::MAX_LENGTH_RULE)
26
    {
27
        if ($encoding === null) {
28
            $encoding = mb_detect_encoding($content);
29
        }
30
        $read = new Parser($content, $encoding, $byteLimit, $maxRuleLength);
31
        $this->rules = $read->export();
32
    }
33
34
    public function setOrigin($url, $statusCode)
35
    {
36
        if (!($this->urlValidate(($url = $this->urlEncode($url))))) {
37
            throw new Exceptions\ClientException('Invalid URL');
38
        }
39
        $this->url = $url;
40
        $this->statusCode = $statusCode;
41
    }
42
43
    /**
44
     * Set UserAgent
45
     *
46
     * @param string|null $userAgent
47
     * @return void
48
     */
49
    public function setUserAgent($userAgent)
50
    {
51
        if (
52
            empty($userAgent) ||
53
            !isset($this->rules[self::DIRECTIVE_USER_AGENT])
54
        ) {
55
            $this->userAgent = self::USER_AGENT;
56
            return;
57
        }
58
        $parser = new UserAgentParser($userAgent);
59
        $this->userAgent = $parser->match(array_keys($this->rules[self::DIRECTIVE_USER_AGENT]));
60
    }
61
62
    /**
63
     *
64
     *
65
     * @param  string $url - url to check
66
     * @return bool
67
     */
68
    public function isAllowed($url)
69
    {
70
        return $this->checkRules(self::DIRECTIVE_ALLOW, $this->getPath($url));
0 ignored issues
show
Security Bug introduced by
It seems like $this->getPath($url) targeting vipnytt\RobotsTxtParser\Client::getPath() can also be of type false; however, vipnytt\RobotsTxtParser\Client::checkRules() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
71
    }
72
73
    /**
74
     * Check rules
75
     *
76
     * @param  string $type - rule to check
77
     * @param  string $path - path to check
78
     * @return bool
79
     */
80
    protected function checkRules($type, $path)
81
    {
82
        // Check each directive for rules, allowed by default
83
        $result = ($type === self::DIRECTIVE_ALLOW);
84
        foreach ([self::DIRECTIVE_DISALLOW, self::DIRECTIVE_ALLOW] as $directive) {
85
            if (!isset($this->rules[self::DIRECTIVE_USER_AGENT][$this->userAgent][$directive])) {
86
                continue;
87
            }
88
            foreach ($this->rules[self::DIRECTIVE_USER_AGENT][$this->userAgent][$directive] as $ruleType => $array) {
89
                // check rule
90
                if ($this->checkRuleSwitch($ruleType, $path, $array)) {
91
                    $result = ($type === $directive);
92
                }
93
            }
94
        }
95
        return $result;
96
    }
97
98
    /**
99
     * Check rule switch
100
     *
101
     * @param  string $type - directive or part of an url
102
     * @param  string $path
103
     * @param  array $array
104
     * @return bool
105
     */
106
    private function checkRuleSwitch($type, $path, $array)
107
    {
108
        switch ($type) {
109
            case self::DIRECTIVE_CLEAN_PARAM:
110
                return $this->checkCleanParamRule($path, $array);
111
            case self::DIRECTIVE_HOST;
0 ignored issues
show
Coding Style introduced by
CASE statements must be defined using a colon

As per the PSR-2 coding standard, case statements should not be wrapped in curly braces. There is no need for braces, since each case is terminated by the next break.

switch ($expr) {
    case "A": { //wrong
        doSomething();
        break;
    }
    case "B": //right
        doSomething();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
112
                return $this->checkHostRule($array);
113
        }
114
        return $this->checkRulePaths($path, $array);
115
    }
116
117
    /**
118
     * Check Clean-Param rule
119
     *
120
     * @param  string $path
121
     * @param  array $array
122
     * @return bool
123
     */
124
    protected function checkCleanParamRule($path, $array)
125
    {
126
        foreach ($array as $param => $paths) {
127
            if (
128
                mb_strpos($path, "?$param=") ||
129
                mb_strpos($path, "&$param=")
130
            ) {
131
                if (
132
                    empty($paths) ||
133
                    $this->checkRulePaths($path, $paths)
134
                ) {
135
                    return true;
136
                }
137
            }
138
        }
139
        return false;
140
    }
141
142
    /**
143
     * Check basic rule
144
     *
145
     * @param  string $path
146
     * @param  array $array
147
     * @return bool
148
     */
149
    protected function checkRulePaths($path, $array)
150
    {
151
        foreach ($array as $robotPath) {
152
            if (preg_match('#' . $robotPath . '#', $path)) {
153
                if (mb_strpos($robotPath, '$') !== false) {
154
                    if (mb_strlen($robotPath) - 1 == mb_strlen($path)) {
155
                        return true;
156
                    }
157
                } else {
158
                    return true;
159
                }
160
            }
161
        }
162
        return false;
163
    }
164
165
    /**
166
     * Check Host rule
167
     *
168
     * @param  array $array
169
     * @return bool
170
     */
171
    protected function checkHostRule($array)
172
    {
173
        if (!isset($this->url)) {
174
            return false;
175
        }
176
        $host = mb_strtolower($this->urlEncode($array[0]));
177
        $url = [
178
            'scheme' => parse_url($this->url, PHP_URL_SCHEME),
179
            'host' => parse_url($this->url, PHP_URL_HOST),
180
        ];
181
        $url['port'] = is_int($port = parse_url($this->url, PHP_URL_PORT)) ? $port : getservbyname($url['scheme'], 'tcp');
182
        $cases = [
183
            $url['host'],
184
            $url['host'] . ':' . $url['port'],
185
            $url['scheme'] . '://' . $url['host'],
186
            $url['scheme'] . '://' . $url['host'] . ':' . $url['port']
187
        ];
188
        if (in_array($host, $cases)) {
189
            return true;
190
        }
191
        return false;
192
    }
193
194
    /**
195
     * Get path
196
     *
197
     * @param string $url
198
     * @return string
199
     * @throws Exceptions\ClientException
200
     */
201
    protected function getPath($url)
202
    {
203
        $url = $this->urlEncode($url);
204
        if (mb_stripos($url, '/') === 0) {
205
            // URL already is a path
206
            return $url;
207
        }
208
        if (!$this->urlValidate($url)) {
209
            throw new Exceptions\ClientException('Invalid URL');
210
        }
211
        return parse_url($url, PHP_URL_PATH);
212
    }
213
214
    /**
215
     *
216
     *
217
     * @param  string $url - url to check
218
     * @return bool
219
     */
220
    public function isDisallowed($url)
221
    {
222
        return $this->checkRules(self::DIRECTIVE_DISALLOW, $this->getPath($url));
0 ignored issues
show
Security Bug introduced by
It seems like $this->getPath($url) targeting vipnytt\RobotsTxtParser\Client::getPath() can also be of type false; however, vipnytt\RobotsTxtParser\Client::checkRules() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
223
    }
224
225
    /**
226
     * Get sitemaps
227
     *
228
     * @return array
229
     */
230
    public function getSitemaps()
231
    {
232
        if (empty($sitemaps = $this->rules[self::DIRECTIVE_SITEMAP])) {
233
            return [];
234
        }
235
        return $sitemaps;
236
    }
237
238
    /**
239
     * Get host
240
     *
241
     * @return string|null
242
     */
243
    public function getHost()
244
    {
245
        if (empty($host = $this->rules[self::DIRECTIVE_HOST][0])) {
246
            return null;
247
        }
248
        return $host;
249
    }
250
251
    /**
252
     * Get Clean-param
253
     *
254
     * @return array
255
     */
256
    public function getCleanParam()
257
    {
258
        if (empty($cleanParam = $this->rules[self::DIRECTIVE_CLEAN_PARAM])) {
259
            return [];
260
        }
261
        return $cleanParam;
262
    }
263
264
    /**
265
     * Get CacheDelay
266
     *
267
     * @param bool $fallback return Crawl-delay if not found
268
     * @return int|float|null
269
     */
270
    public function getCacheDelay($fallback = true)
271
    {
272
        if (empty($cacheDelay = $this->rules[self::DIRECTIVE_CACHE_DELAY])) {
273
            return ($fallback) ? $this->getCrawlDelay() : null;
274
        }
275
        return $cacheDelay;
276
    }
277
278
    /**
279
     * Get CrawlDelay
280
     *
281
     * @return int|float
282
     */
283
    public function getCrawlDelay()
284
    {
285
        if (empty($crawlDelay = $this->rules[self::DIRECTIVE_CRAWL_DELAY])) {
286
            return 0;
287
        }
288
        return $crawlDelay;
289
    }
290
}
291