Completed
Push — master ( 0fbb6b...23ae93 )
by Jan-Petter
02:52
created

DirectiveParserCommons::validateDirective()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 7
rs 9.4285
cc 2
eloc 4
nc 2
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser\Directives;
3
4
use DateTimeZone;
5
6
/**
7
 * Class DirectiveParserCommons
8
 *
9
 * @package vipnytt\RobotsTxtParser\Directive
10
 */
11
trait DirectiveParserCommons
12
{
13
    /**
14
     * Generate directive/rule pair
15
     *
16
     * @param string $line
17
     * @param string[] $whiteList
18
     * @return string[]|false
19
     */
20
    private function generateRulePair($line, array $whiteList)
21
    {
22
        $whiteList = array_map('mb_strtolower', $whiteList);
23
        // Split by directive and rule
24
        $pair = array_map('trim', mb_split(':', $line, 2));
25
        // Check if the line contains a rule
26
        if (
27
            empty($pair[1]) ||
28
            empty($pair[0]) ||
29
            !in_array(($pair[0] = str_ireplace(array_keys(self::ALIAS_DIRECTIVES), array_values(self::ALIAS_DIRECTIVES), mb_strtolower($pair[0]))), $whiteList)
30
        ) {
31
            // Line does not contain any supported directive
32
            return false;
33
        }
34
        return [
35
            'directive' => $pair[0],
36
            'value' => $pair[1],
37
        ];
38
    }
39
40
    /**
41
     * Client rate as specified in the `Robot exclusion standard` version 2.0 draft
42
     * rate = numDocuments / timeUnit
43
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate
44
     *
45
     * @param string $string
46
     * @return float|int|false
47
     */
48
    private function draftParseRate($string)
49
    {
50
        $parts = array_map('trim', mb_split('/', $string));
51
        if (count($parts) != 2) {
52
            return false;
53
        }
54
        $multiplier = 1;
55
        switch (mb_substr(mb_strtolower(preg_replace('/[^A-Za-z]/', '', $parts[1])), 0, 1)) {
56
            case 'm':
57
                $multiplier = 60;
58
                break;
59
            case 'h':
60
                $multiplier = 3600;
61
                break;
62
            case 'd':
63
                $multiplier = 86400;
64
                break;
65
        }
66
        $num = floatval(preg_replace('/[^0-9]/', '', $parts[0]));
67
        $sec = floatval(preg_replace('/[^0-9.]/', '', $parts[1])) * $multiplier;
68
        $rate = $sec / $num;
69
        return $rate > 0 ? $rate : false;
70
    }
71
72
    /**
73
     * Client timestamp range as specified in the `Robot exclusion standard` version 2.0 draft
74
     * @link http://www.conman.org/people/spc/robots2.html#format.directives.visit-time
75
     *
76
     * @param $string
77
     * @return string[]|false
78
     */
79
    private function draftParseTime($string)
80
    {
81
        $array = preg_replace('/[^0-9]/', '', mb_split('-', $string));
82
        if (
83
            count($array) != 2 ||
84
            ($fromTime = date_create_from_format('Hi', $array[0], $dtz = new DateTimeZone('UTC'))) === false ||
85
            ($toTime = date_create_from_format('Hi', $array[1], $dtz)) === false
86
        ) {
87
            return false;
88
        }
89
        return [
90
            'from' => date_format($fromTime, 'Hi'),
91
            'to' => date_format($toTime, 'Hi'),
92
        ];
93
    }
94
}
95