Completed
Push — master ( e06ed9...e8e631 )
by Jan-Petter
02:04
created

Toolbox::checkPath()   D

Complexity

Conditions 9
Paths 25

Size

Total Lines 43
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 5
Bugs 2 Features 0
Metric Value
c 5
b 2
f 0
dl 0
loc 43
rs 4.909
cc 9
eloc 18
nc 25
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ParserException;
5
6
/**
7
 * Trait Toolbox
8
 *
9
 * @package vipnytt\RobotsTxtParser\Parser
10
 */
11
trait Toolbox
12
{
13
    /**
14
     * Check basic rule
15
     *
16
     * @param string $path
17
     * @param array $paths
18
     * @return bool
19
     */
20
    protected function checkPath($path, $paths)
21
    {
22
        foreach ($paths as $rule) {
23
            $escape = ['?' => '\?', '.' => '\.', '*' => '.*'];
24
            foreach ($escape as $search => $replace) {
25
                $rule = str_replace($search, $replace, $rule);
26
            }
27
            /**
28
             * Warning: preg_match need to be replaced
29
             *
30
             * Bug report
31
             * @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/62
32
             *
33
             * An robots.txt parser, where a bug-fix is planned
34
             * @link https://github.com/diggin/Diggin_RobotRules
35
             *
36
             * The solution?
37
             * PHP PEG (parsing expression grammar)
38
             * @link https://github.com/hafriedlander/php-peg
39
             */
40
            try {
41
                if (!preg_match('#' . $rule . '#', $path)) {
42
                    // Rule does not match
43
                    continue;
44
                } else if (mb_stripos($rule, '$') === false) {
45
                    // No special parsing required
46
                    return true;
47
                } else if (($wildcardPos = mb_strrpos($rule, '*')) !== false) {
48
                    // Rule contains both an end anchor ($) and wildcard (*)
49
                    $afterWildcard = mb_substr($rule, $wildcardPos + 1, mb_strlen($rule) - $wildcardPos - 2);
50
                    if ($afterWildcard == mb_substr($path, -mb_strlen($afterWildcard))) {
51
                        return true;
52
                    }
53
                } else if (mb_substr($rule, 0, -1) == $path) {
54
                    // Rule does contains an end anchor
55
                    return true;
56
                }
57
            } catch (\Exception $e) {
58
                // An preg_match bug has occurred
59
            }
60
        }
61
        return false;
62
    }
63
64
    /**
65
     * Generate directive/rule pair
66
     *
67
     * @param string $line
68
     * @param array $whiteList
69
     * @return array|false
70
     */
71
    protected function generateRulePair($line, $whiteList)
72
    {
73
        $whiteList = array_map('mb_strtolower', $whiteList);
74
        // Split by directive and rule
75
        $pair = array_map('trim', mb_split(':', $line, 2));
76
        // Check if the line contains a rule
77
        if (
78
            empty($pair[1]) ||
79
            empty($pair[0]) ||
80
            !in_array(($pair[0] = mb_strtolower($pair[0])), $whiteList)
81
        ) {
82
            // Line does not contain any supported directive
83
            return false;
84
        }
85
        return [
86
            'directive' => $pair[0],
87
            'value' => $pair[1],
88
        ];
89
    }
90
91
    /**
92
     * Validate directive
93
     *
94
     * @param $directive
95
     * @param $directives
96
     * @return string
97
     * @throws ParserException
98
     */
99
    protected function validateDirective($directive, $directives)
100
    {
101
        if (!in_array($directive, $directives, true)) {
102
            throw new ParserException('Directive is not allowed here');
103
        }
104
        return mb_strtolower($directive);
105
    }
106
}
107