Completed
Push — master ( e69112...9db678 )
by Jan-Petter
04:16
created

DirectiveParserCommons::generateRulePair()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 19
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 1
Metric Value
c 3
b 0
f 1
dl 0
loc 19
rs 9.2
cc 4
eloc 11
nc 2
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser\Directives;
3
4
use DateTimeZone;
5
use vipnytt\RobotsTxtParser\Exceptions\ParserException;
6
7
/**
8
 * Class DirectiveParserCommons
9
 *
10
 * @package vipnytt\RobotsTxtParser\Directive
11
 */
12
trait DirectiveParserCommons
13
{
14
    /**
15
     * Generate directive/rule pair
16
     *
17
     * @param string $line
18
     * @param string[] $whiteList
19
     * @return string[]|false
20
     */
21
    private function generateRulePair($line, array $whiteList)
22
    {
23
        $whiteList = array_map('mb_strtolower', $whiteList);
24
        // Split by directive and rule
25
        $pair = array_map('trim', mb_split(':', $line, 2));
26
        // Check if the line contains a rule
27
        if (
28
            empty($pair[1]) ||
29
            empty($pair[0]) ||
30
            !in_array(($pair[0] = mb_strtolower($pair[0])), $whiteList)
31
        ) {
32
            // Line does not contain any supported directive
33
            return false;
34
        }
35
        return [
36
            'directive' => $pair[0],
37
            'value' => $pair[1],
38
        ];
39
    }
40
41
    /**
42
     * Client rate as specified in the `Robot exclusion standard` version 2.0 draft
43
     * rate = numDocuments / timeUnit
44
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate
45
     *
46
     * @param string $string
47
     * @return float|int|false
48
     */
49
    private function draftParseRate($string)
50
    {
51
        $parts = array_map('trim', mb_split('/', $string));
52
        if (count($parts) != 2) {
53
            return false;
54
        }
55
        $multiplier = 1;
56
        switch (mb_substr(mb_strtolower(preg_replace('/[^A-Za-z]/', '', $parts[1])), 0, 1)) {
57
            case 'm':
58
                $multiplier = 60;
59
                break;
60
            case 'h':
61
                $multiplier = 3600;
62
                break;
63
            case 'd':
64
                $multiplier = 86400;
65
                break;
66
        }
67
        $num = floatval(preg_replace('/[^0-9]/', '', $parts[0]));
68
        $sec = floatval(preg_replace('/[^0-9.]/', '', $parts[1])) * $multiplier;
69
        $rate = $sec / $num;
70
        return $rate > 0 ? $rate : false;
71
    }
72
73
    /**
74
     * Client timestamp range as specified in the `Robot exclusion standard` version 2.0 draft
75
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.visit-time
76
     *
77
     * @param $string
78
     * @return string[]|false
79
     */
80
    private function draftParseTime($string)
81
    {
82
        $array = preg_replace('/[^0-9]/', '', mb_split('-', $string));
83
        if (
84
            count($array) != 2 ||
85
            ($fromTime = date_create_from_format('Hi', $array[0], $dtz = new DateTimeZone('UTC'))) === false ||
86
            ($toTime = date_create_from_format('Hi', $array[1], $dtz)) === false
87
        ) {
88
            return false;
89
        }
90
        return [
91
            'from' => date_format($fromTime, 'Hi'),
92
            'to' => date_format($toTime, 'Hi'),
93
        ];
94
    }
95
96
    /**
97
     * Validate directive
98
     *
99
     * @param string $directive
100
     * @param string[] $directives
101
     * @return string
102
     * @throws ParserException
103
     */
104
    private function validateDirective($directive, array $directives)
105
    {
106
        if (!in_array($directive, $directives, true)) {
107
            throw new ParserException('Directive not supported by this class');
108
        }
109
        return mb_strtolower($directive);
110
    }
111
}
112