| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace PiedWeb\UrlHarvester; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | use InvalidArgumentException; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * Waiting validation for PR : | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * https://github.com/spatie/robots-txt/pull/21. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | class RobotsHeaders | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     protected $robotHeadersProperties = []; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 15 |  |  |     public static function readFrom(string $source): self | 
            
                                                                        
                            
            
                                    
            
            
                | 16 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 17 |  |  |         $content = @file_get_contents($source); | 
            
                                                                        
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 19 |  |  |         if (false === $content) { | 
            
                                                                        
                            
            
                                    
            
            
                | 20 |  |  |             throw new InvalidArgumentException("Could not read from source `{$source}`"); | 
            
                                                                        
                            
            
                                    
            
            
                | 21 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 23 |  |  |         return new self($http_response_header ?? []); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 | 9 |  |     public static function create(array $headers): self | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 | 9 |  |         return new self($headers); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 | 9 |  |     public function __construct(array $headers) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 9 |  |         $this->robotHeadersProperties = $this->parseHeaders($headers); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 | 9 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 | 9 |  |     public function mayIndex(string $userAgent = '*'): bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 | 9 |  |         return !$this->noindex($userAgent); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     public function mayFollow(string $userAgent = '*'): bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         return !$this->nofollow($userAgent); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 | 9 |  |     public function noindex(string $userAgent = '*'): bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |             // 1. We check for the suggested user-agent | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 | 9 |  |             $this->robotHeadersProperties[$userAgent]['noindex'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |             // 2. We check for all user-agent | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 9 |  |             ?? $this->robotHeadersProperties['*']['noindex'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |             // 3. noindex doesn't exist, so return fasle | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 | 9 |  |             ?? false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     public function nofollow(string $userAgent = '*'): bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |             $this->robotHeadersProperties[$userAgent]['nofollow'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |             ?? $this->robotHeadersProperties['*']['nofollow'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |             ?? false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 | 9 |  |     protected function parseHeaders(array $headers): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 | 9 |  |         $robotHeaders = $this->filterRobotHeaders($headers); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         return array_reduce($robotHeaders, function (array $parsedHeaders, $header) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 | 3 |  |             $header = $this->normalizeHeaders($header); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 3 |  |             $headerParts = explode(':', $header); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 3 |  |             $userAgent = 3 === count($headerParts) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |                 ? trim($headerParts[1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 | 3 |  |                 : '*'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 | 3 |  |             $options = end($headerParts); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 | 3 |  |             $parsedHeaders[$userAgent] = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 | 3 |  |                 'noindex' => false !== strpos(strtolower($options), 'noindex'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 | 3 |  |                 'nofollow' => false !== strpos(strtolower($options), 'nofollow'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 | 3 |  |             return $parsedHeaders; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 | 9 |  |         }, []); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 | 9 |  |     protected function filterRobotHeaders(array $headers): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         return array_filter($headers, function ($header) use ($headers) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 | 9 |  |             $headerContent = $this->normalizeHeaders($headers[$header] ?? []); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 | 9 |  |             return 0 === strpos(strtolower($header), 'x-robots-tag') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 | 9 |  |                 || 0 === strpos(strtolower($headerContent), 'x-robots-tag'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 | 9 |  |         }, ARRAY_FILTER_USE_KEY); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 | 9 |  |     protected function normalizeHeaders($headers): string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 | 9 |  |         return implode(',', (array) $headers); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 103 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 104 |  |  |  |