| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |   namespace Xparse\Parser\Helper; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |   use GuzzleHttp\Psr7\Response; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |   use Xparse\ElementFinder\ElementFinder; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |   use Xparse\ElementFinder\Helper; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |   /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |    * Create ElementFinder. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |    * Convert charset to UTF-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |    * Convert relative links to absolute | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |    */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |   class ElementFinderFactory { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |      * @var array|null | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     private static $supportedEncodings = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |      * @param Response $response | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |      * @param string $affectedUrl | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |      * @return ElementFinder | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 | 16 |  |     public static function create(Response $response, $affectedUrl = '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 | 16 |  |       $html = $response->getBody(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 | 16 |  |       $html = Helper::safeEncodeStr((string) $html); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 | 16 |  |       $supportedEncodings = self::getSupportedEncodings(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 16 |  |       $contentType = $response->getHeaderLine('content-type'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 | 16 |  |       if ($contentType) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 | 3 |  |         preg_match("!^.*charset=([A-Za-z0-9-]{4,})$!", $contentType, $contentTypeData); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 | 3 |  |         $encoding = strtoupper(trim($contentTypeData[1])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 | 2 |  |       } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 | 13 |  |         preg_match("!.*<meta.*charset=\"?([A-Za-z0-9-]{4,})\"!mi", $html, $metaContentType); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 | 13 |  |         $encoding = !empty($metaContentType[1]) ? strtoupper(trim($metaContentType[1])) : ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |       } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 | 15 |  |       if (in_array($encoding, $supportedEncodings)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 | 5 |  |         $html = mb_convert_encoding($html, 'UTF-8', $encoding); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 | 5 |  |       } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 | 15 |  |       $page = new ElementFinder((string) $html); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 | 15 |  |       if ($affectedUrl) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         LinkConverter::convertUrlsToAbsolute($page, $affectedUrl); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |       } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 | 15 |  |       return $page; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |      * @return array | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 59 |  |  |      */ | 
            
                                                        
            
                                    
            
            
                | 60 | 16 |  |     private static function getSupportedEncodings() { | 
            
                                                        
            
                                    
            
            
                | 61 | 16 |  |       if (self::$supportedEncodings !== null) { | 
            
                                                        
            
                                    
            
            
                | 62 | 15 |  |         return self::$supportedEncodings; | 
            
                                                        
            
                                    
            
            
                | 63 |  |  |       } | 
            
                                                        
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 65 | 1 |  |       self::$supportedEncodings = []; | 
            
                                                        
            
                                    
            
            
                | 66 | 1 |  |       foreach (mb_list_encodings() as $encoding) { | 
            
                                                        
            
                                    
            
            
                | 67 | 1 |  |         if ($encoding == 'UTF-8' or $encoding == 'UTF8') { | 
            
                                                        
            
                                    
            
            
                | 68 | 1 |  |           continue; | 
            
                                                        
            
                                    
            
            
                | 69 |  |  |         } | 
            
                                                        
            
                                    
            
            
                | 70 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 71 | 1 |  |         self::$supportedEncodings[] = $encoding; | 
            
                                                        
            
                                    
            
            
                | 72 | 1 |  |         self::$supportedEncodings = array_merge(self::$supportedEncodings, mb_encoding_aliases($encoding)); | 
            
                                                        
            
                                    
            
            
                | 73 | 1 |  |       } | 
            
                                                        
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 75 | 1 |  |       return self::$supportedEncodings; | 
            
                                                        
            
                                    
            
            
                | 76 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 77 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 78 |  |  |   } |