| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace Jclyons52\PagePreview; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | use Jclyons52\PHPQuery\Document; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 7 |  |  | class Crawler | 
            
                                                                        
                            
            
                                    
            
            
                | 8 |  |  | { | 
            
                                                                        
                            
            
                                    
            
            
                | 9 |  |  |     private $document; | 
            
                                                                        
                            
            
                                    
            
            
                | 10 |  |  |      | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 | 39 |  |     public function __construct(Document $document) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 | 39 |  |         $this->document = $document; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 14 | 39 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 16 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 17 |  |  |      * @return string | 
            
                                                                        
                            
            
                                    
            
            
                | 18 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 | 39 |  |     public function title() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 | 39 |  |         return $this->document->querySelector('title')->text(); | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 22 |  |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 24 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 25 |  |  |      * @return mixed | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 | 39 |  |     public function metaKeywords() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 | 39 |  |         $keywordsElement = $this->document->querySelector('meta[name="keywords"]'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 | 39 |  |         if (!$keywordsElement) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 | 6 |  |             return []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 2 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 | 33 |  |         $keywordString = $keywordsElement->attr('content'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 | 33 |  |         $keywords = explode(',', $keywordString); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 | 33 |  |         return array_map(function ($word) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 | 33 |  |             return trim($word); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 | 33 |  |         }, $keywords); | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 43 |  |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 45 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 46 |  |  |      * @param string $element | 
            
                                                                        
                            
            
                                    
            
            
                | 47 |  |  |      * @return array | 
            
                                                                        
                            
            
                                    
            
            
                | 48 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 | 39 |  |     public function meta($element = null) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 | 39 |  |         $selector = "meta"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 39 |  |         if ($element !== null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 | 39 |  |             $selector .= "[name='{$element}']"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 | 39 |  |             $metaTags =  $this->document->querySelector($selector); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 | 39 |  |             if ($metaTags === null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 | 6 |  |                 return null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 | 33 |  |             return  $metaTags->attr('content'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 | 39 |  |         $metaTags = $this->document->querySelectorAll($selector); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 | 39 |  |         return $this->metaTagsToArray($metaTags); | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 63 | 2 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 65 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 66 |  |  |      * get source attributes of all image tags on the page | 
            
                                                                        
                            
            
                                    
            
            
                | 67 |  |  |      * @return array<String> | 
            
                                                                        
                            
            
                                    
            
            
                | 68 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 | 39 |  |     public function images() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 | 39 |  |         $images = $this->document->querySelectorAll('img'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 | 39 |  |         if ($images === []) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 3 |  |             return []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 | 36 |  |         return $images->attr('src'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |          | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 78 |  |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 81 |  |  |      * @param \Jclyons52\PHPQuery\Support\NodeCollection $metaTags | 
            
                                                                        
                            
            
                                    
            
            
                | 82 |  |  |      * @return array | 
            
                                                                        
                            
            
                                    
            
            
                | 83 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 | 39 |  |     private function metaTagsToArray($metaTags) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 | 39 |  |         $values = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 | 39 |  |         foreach ($metaTags as $meta) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 | 39 |  |             $name = $meta->attr('name'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 | 39 |  |             if ($name === '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 | 39 |  |                 $name = $meta->attr('property'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 | 26 |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 | 39 |  |             $content = $meta->attr('content'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 | 39 |  |             if ($name === '' || $content == '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 | 39 |  |                 continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 | 33 |  |             $values[$name] = $content; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 | 26 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 | 39 |  |         return $values; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 99 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 100 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 101 |  |  |  |