| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace Bee4\RobotsTxt; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | use Bee4\RobotsTxt\Exception\InvalidContentException; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * Class Parser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * Take the content of a robots.txt file and transform it to rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * @copyright Bee4 2015 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  * @author    Stephane HULARD <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | class Parser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |      * Parse the current content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |      * @return Rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     public function analyze($content) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 | 1 |  |         $content = $this->handleContent($content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 | 1 |  |         $rules = new Rules(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 | 1 |  |         $current = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 | 1 |  |         $ua = false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 | 1 |  |         foreach ($content as $line) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 | 1 |  |             if (0 === strpos($line, '#')) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 | 1 |  |                 continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 1 |  |             if (preg_match('/^\s*User-Agent\: (.*)$/i', $line, $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 | 1 |  |                 if ($ua !== true) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 | 1 |  |                     $this->populateRules($rules, $current); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 | 1 |  |                     $current = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 | 1 |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 | 1 |  |                 $current[] = new Rule($matches[1]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 | 1 |  |                 $ua = true; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 | 1 |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 | 1 |  |                 $ua = false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 | 1 |  |                 $this->parseLine($current, $line, $rules); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 | 1 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 | 1 |  |         $this->populateRules($rules, $current); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 | 1 |  |         return $rules; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |      * Handle content to build a valid instance | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |      * @param  string|Content $content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |      * @return Content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     private function handleContent($content) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 | 1 |  |         if (is_string($content)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 1 |  |             $content = new Content($content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 | 1 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 | 1 |  |         if (!($content instanceof Content)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 | 1 |  |             throw (new InvalidContentException( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |                 'Content must be a `string` or a `Content` instance' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 | 1 |  |             ))->setContent($content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 | 1 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 | 1 |  |         return $content; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |      * Transform file content to structured Rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |      * @param string|Content $content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |      * @return Rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |     public static function parse($content) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 | 1 |  |         $parser = new self(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 | 1 |  |         return $parser->analyze($content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |      * Parse a line of data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |      * @param  array  &$current | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |      * @param  string $line | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 83 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 84 |  |  |     private function parseLine(array &$current, $line, Rules $rules) | 
            
                                                                        
                            
            
                                    
            
            
                | 85 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 86 | 1 |  |         if (preg_match('/^\s*Allow: (.*)$/i', $line, $matches)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 87 | 1 |  |             $this->apply($current, 'allow', $matches[1]); | 
            
                                                                        
                            
            
                                    
            
            
                | 88 | 1 |  |         } elseif (preg_match('/^\s*Disallow: (.*)$/i', $line, $matches)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 89 | 1 |  |             $this->apply($current, 'disallow', $matches[1]); | 
            
                                                                        
                            
            
                                    
            
            
                | 90 | 1 |  |         } elseif (preg_match('/^\s*Sitemap: (.*)$/i', $line, $matches)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 91 | 1 |  |             $rules->addSitemap($matches[1]); | 
            
                                                                        
                            
            
                                    
            
            
                | 92 | 1 |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 93 | 1 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |      * Apply a method on all element of a given array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |      * @param  array  $data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |      * @param  string $method | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |      * @param  string $param | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     private function apply(array $data, $method, $param) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 | 1 |  |         array_walk($data, function ($item) use ($method, $param) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 | 1 |  |             $item->$method($param); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 | 1 |  |         }); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 | 1 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |      * Populate rules property with build Rule instance | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |      * @param  Rules  $rules | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |      * @param  array  $current Collection of Rule objects | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     private function populateRules(Rules $rules, array $current) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 | 1 |  |         foreach ($current as $item) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 | 1 |  |             $rules->add($item); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 | 1 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 | 1 |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 119 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 120 |  |  |  |