Completed
Pull Request — master (#2)
by Jan-Petter
02:43
created

DirectiveParserCommons::draftParseRate()   B

Complexity

Conditions 6
Paths 9

Size

Total Lines 23
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 23
rs 8.5906
cc 6
eloc 19
nc 9
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser\Directives;
3
4
use DateTimeZone;
5
use vipnytt\RobotsTxtParser\Exceptions\ParserException;
6
7
/**
8
 * Class DirectiveParserCommons
9
 *
10
 * @package vipnytt\RobotsTxtParser\Directive
11
 */
12
trait DirectiveParserCommons
13
{
14
    /**
15
     * Check path rule
16
     *
17
     * @param string $path
18
     * @param string[] $paths
19
     * @return bool
20
     */
21
    private function checkPaths($path, array $paths)
22
    {
23
        foreach ($paths as $rule) {
24
            $escape = [
25
                '?' => '\?',
26
                '.' => '\.',
27
                '*' => '.*',
28
            ];
29
            foreach ($escape as $search => $replace) {
30
                $rule = str_replace($search, $replace, $rule);
31
            }
32
            if ($this->checkPathsCallback($rule, $path)) {
33
                return true;
34
            }
35
        }
36
        return false;
37
    }
38
39
    /**
40
     * Callback for CheckPath
41
     *
42
     * @param string $rule
43
     * @param string $path
44
     * @return bool
45
     */
46
    private function checkPathsCallback($rule, $path)
47
    {
48
        /**
49
         * Warning: preg_match need to be replaced
50
         *
51
         * Bug report
52
         * @link https://github.com/t1gor/Robots.txt-Core-Class/issues/62
53
         *
54
         * An robots.txt parser, where a bug-fix is planned
55
         * @link https://github.com/diggin/Diggin_RobotRules
56
         *
57
         * The solution?
58
         * PHP PEG (parsing expression grammar)
59
         * @link https://github.com/hafriedlander/php-peg
60
         */
61
        try {
62
            if (!preg_match('#' . $rule . '#', $path)) {
63
                // Rule does not match
64
                return false;
65
            } elseif (
66
                mb_stripos($rule, '$') === false || // No special parsing required
67
                mb_substr($rule, 0, -1) == $path // Rule does contain an end anchor, and matches
68
            ) {
69
                return true;
70
            } elseif (($wildcardPos = mb_strrpos($rule, '*')) !== false) {
71
                // Rule contains both an end anchor ($) and wildcard (*)
72
                $afterWildcard = mb_substr($rule, $wildcardPos + 1, mb_strlen($rule) - $wildcardPos - 2);
73
                if ($afterWildcard == mb_substr($path, -mb_strlen($afterWildcard))) {
74
                    return true;
75
                }
76
            }
77
        } catch (\Exception $e) {
78
            // An preg_match bug has occurred
79
        }
80
        return false;
81
    }
82
83
    /**
84
     * Generate directive/rule pair
85
     *
86
     * @param string $line
87
     * @param string[] $whiteList
88
     * @return string[]|false
89
     */
90
    private function generateRulePair($line, array $whiteList)
91
    {
92
        $whiteList = array_map('mb_strtolower', $whiteList);
93
        // Split by directive and rule
94
        $pair = array_map('trim', mb_split(':', $line, 2));
95
        // Check if the line contains a rule
96
        if (
97
            empty($pair[1]) ||
98
            empty($pair[0]) ||
99
            !in_array(($pair[0] = mb_strtolower($pair[0])), $whiteList)
100
        ) {
101
            // Line does not contain any supported directive
102
            return false;
103
        }
104
        return [
105
            'directive' => $pair[0],
106
            'value' => $pair[1],
107
        ];
108
    }
109
110
    /**
111
     * Client rate as specified in the `Robot exclusion standard` version 2.0 draft
112
     * rate = numDocuments / timeUnit
113
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate
114
     *
115
     * @param string $string
116
     * @return float|int|false
117
     */
118
    private function draftParseRate($string)
119
    {
120
        $parts = array_map('trim', mb_split('/', $string));
121
        if (count($parts) != 2) {
122
            return false;
123
        }
124
        $multiplier = 1;
125
        switch (mb_substr(mb_strtolower(preg_replace('/[^A-Za-z]/', '', $parts[1])), 0, 1)) {
126
            case 'm':
127
                $multiplier = 60;
128
                break;
129
            case 'h':
130
                $multiplier = 3600;
131
                break;
132
            case 'd':
133
                $multiplier = 86400;
134
                break;
135
        }
136
        $num = floatval(preg_replace('/[^0-9]/', '', $parts[0]));
137
        $sec = floatval(preg_replace('/[^0-9.]/', '', $parts[1])) * $multiplier;
138
        $rate = $sec / $num;
139
        return $rate > 0 ? $rate : false;
140
    }
141
142
    /**
143
     * Client timestamp range as specified in the `Robot exclusion standard` version 2.0 draft
144
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.visit-time
145
     *
146
     * @param $string
147
     * @return string[]|false
148
     */
149
    private function draftParseTime($string)
150
    {
151
        $array = preg_replace('/[^0-9]/', '', mb_split('-', $string));
152
        if (
153
            count($array) != 2 ||
154
            ($fromTime = date_create_from_format('Hi', $array[0], $dtz = new DateTimeZone('UTC'))) === false ||
155
            ($toTime = date_create_from_format('Hi', $array[1], $dtz)) === false
156
        ) {
157
            return false;
158
        }
159
        return [
160
            'from' => date_format($fromTime, 'Hi'),
161
            'to' => date_format($toTime, 'Hi'),
162
        ];
163
    }
164
165
    /**
166
     * Validate directive
167
     *
168
     * @param string $directive
169
     * @param string[] $directives
170
     * @return string
171
     * @throws ParserException
172
     */
173
    private function validateDirective($directive, array $directives)
174
    {
175
        if (!in_array($directive, $directives, true)) {
176
            throw new ParserException('Directive not supported by this class');
177
        }
178
        return mb_strtolower($directive);
179
    }
180
}
181