| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  * vipnytt/RobotsTxtParser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * @link https://github.com/VIPnytt/RobotsTxtParser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | namespace vipnytt\RobotsTxtParser\Parser\Directives; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | use vipnytt\RobotsTxtParser\Client\Directives\RequestRateClient; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | use vipnytt\RobotsTxtParser\Handler\RenderHandler; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | use vipnytt\RobotsTxtParser\RobotsTxtInterface; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  * Class RequestRateParser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  * @package vipnytt\RobotsTxtParser\Parser\Directives | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | class RequestRateParser implements ParserInterface, RobotsTxtInterface | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     use DirectiveParserTrait; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |      * Base uri | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |      * @var string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     private $base; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |      * RequestRate array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     private $requestRates = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |      * Sorted | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |      * @var bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     private $sorted = false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |      * Time units | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |      * @var int[] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     private $units = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         'w' => 604800, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         'd' => 86400, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         'h' => 3600, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         'm' => 60, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |      * RequestRate constructor. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |      * @param string $base | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     public function __construct($base) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         $this->base = $base; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |      * Add | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |      * @param string $line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |      * @return bool | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 68 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 69 |  |  |     public function add($line) | 
            
                                                                        
                            
            
                                    
            
            
                | 70 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 71 |  |  |         $array = preg_split('/\s+/', $line, 2); | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  |         $result = [ | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  |             'rate' => $this->draftParseRate($array[0]), | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |             'from' => null, | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  |             'to' => null, | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  |         ]; | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  |         if ($result['rate'] === false) { | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  |             return false; | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |         } elseif (!empty($array[1]) && | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |             ($times = $this->draftParseTime($array[1])) !== false | 
            
                                                                        
                            
            
                                    
            
            
                | 81 |  |  |         ) { | 
            
                                                                        
                            
            
                                    
            
            
                | 82 |  |  |             $result = array_merge($result, $times); | 
            
                                                                        
                            
            
                                    
            
            
                | 83 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 84 |  |  |         $this->requestRates[] = $result; | 
            
                                                                        
                            
            
                                    
            
            
                | 85 |  |  |         return true; | 
            
                                                                        
                            
            
                                    
            
            
                | 86 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |      * Client rate as specified in the `Robot exclusion standard` version 2.0 draft | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |      * rate = numDocuments / timeUnit | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |      * @link http://www.conman.org/people/spc/robots2.html#format.directives.request-rate | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |      * @param string $string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |      * @return float|int|false | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |     private function draftParseRate($string) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         $parts = array_map('trim', explode('/', $string)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         if (count($parts) != 2) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |             return false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         $unit = strtolower(substr(preg_replace('/[^A-Za-z]/', '', filter_var($parts[1], FILTER_SANITIZE_STRING)), 0, 1)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         $multiplier = isset($this->units[$unit]) ? $this->units[$unit] : 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         $rate = abs(filter_var($parts[1], FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION)) * $multiplier / abs(filter_var($parts[0], FILTER_SANITIZE_NUMBER_INT)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |         return $rate > 0 ? $rate : false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |      * Client | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |      * @param string $userAgent | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |      * @param float|int $fallbackValue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |      * @return RequestRateClient | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |     public function client($userAgent = self::USER_AGENT, $fallbackValue = 0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |         $this->sort(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         return new RequestRateClient($this->base, $userAgent, $this->requestRates, $fallbackValue); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |      * Sort | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |      * @return bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |     private function sort() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         if (!$this->sorted) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |             $this->sorted = true; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |             return usort($this->requestRates, function (array $requestRateA, array $requestRateB) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |                 // PHP 7: Switch to the <=> "Spaceship" operator | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |                 return $requestRateB['rate'] > $requestRateA['rate']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |             }); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |         return $this->sorted; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |      * Render | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |      * @param RenderHandler $handler | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |      * @return bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |     public function render(RenderHandler $handler) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         $this->sort(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         foreach ($this->requestRates as $array) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |             $multiplyFactor = $this->decimalMultiplier($array['rate']); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |             $multipliedRate = $array['rate'] * $multiplyFactor; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |             $gcd = $this->getGCD($multiplyFactor, $multipliedRate); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |             $requests = $multiplyFactor / $gcd; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |             $time = $multipliedRate / $gcd; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |             $suffix = 's'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |             foreach ($this->units as $unit => $sec) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |                 if ($time % $sec === 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |                     $suffix = $unit; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |                     $time /= $sec; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |             if (isset($array['from']) && | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |                 isset($array['to']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |             ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |                 $suffix .= ' ' . $array['from'] . '-' . $array['to']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |             $handler->add(self::DIRECTIVE_REQUEST_RATE, $requests . '/' . $time . $suffix); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         return true; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |      * @param int|float $value | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |      * @return int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |     private function decimalMultiplier($value) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |         $multiplier = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |         while (fmod($value, 1) != 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |             $value *= 10; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |             $multiplier *= 10; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |         return $multiplier; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |      * Returns the greatest common divisor of two integers using the Euclidean algorithm. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |      * @param int $a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |      * @param int $b | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |      * @return int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |     private function getGCD($a, $b) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |         if (extension_loaded('gmp')) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |             return gmp_intval(gmp_gcd((string)$a, (string)$b)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |         $large = $a > $b ? $a : $b; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |         $small = $a > $b ? $b : $a; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |         $remainder = $large % $small; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |         return 0 === $remainder ? $small : $this->getGCD($small, $remainder); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 202 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 203 |  |  |  |