| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |   namespace Xparse\Parser\Helper; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |   /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |    * Class EncodingConverter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |    * @package Xparse\Parser\Helper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |    */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |   class HtmlEncodingConverter { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |      * @var array|null | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     private static $supportedEncodings = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |      * Try to detect input encoding from contentType or from html <meta> tag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |      * @param string $html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |      * @param bool $contentType | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |      * @return string | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 23 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 24 | 25 |  |     public static function convertToUtf($html, $contentType = false) { | 
            
                                                                        
                            
            
                                    
            
            
                | 25 | 25 |  |       $encoding = null; | 
            
                                                                        
                            
            
                                    
            
            
                | 26 | 25 |  |       if (!empty($contentType)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 27 | 6 |  |         preg_match("!^.*charset=([A-Za-z0-9-]{4,})$!", $contentType, $contentTypeData); | 
            
                                                                        
                            
            
                                    
            
            
                | 28 | 6 |  |         $encoding = !empty($contentTypeData[1]) ? trim($contentTypeData[1]) : null; | 
            
                                                                        
                            
            
                                    
            
            
                | 29 | 6 |  |       } | 
            
                                                                        
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 31 | 25 |  |       if ($encoding === null) { | 
            
                                                                        
                            
            
                                    
            
            
                | 32 | 22 |  |         preg_match("!.*<meta.*charset=[\"']?[ \t]*([A-Za-z0-9-]{4,})[ \t]*[\"']!mi", $html, $metaContentType); | 
            
                                                                        
                            
            
                                    
            
            
                | 33 | 22 |  |         $encoding = !empty($metaContentType[1]) ? trim($metaContentType[1]) : null; | 
            
                                                                        
                            
            
                                    
            
            
                | 34 | 22 |  |       } | 
            
                                                                        
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 36 | 25 |  |       if ($encoding === null) { | 
            
                                                                        
                            
            
                                    
            
            
                | 37 | 3 |  |         return $html; | 
            
                                                                        
                            
            
                                    
            
            
                | 38 |  |  |       } | 
            
                                                                        
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 40 | 22 |  |       $encoding = strtolower($encoding); | 
            
                                                                        
                            
            
                                    
            
            
                | 41 | 22 |  |       if (in_array($encoding, self::getSupportedEncodings())) { | 
            
                                                                        
                            
            
                                    
            
            
                | 42 | 11 |  |         $html = mb_convert_encoding($html, 'utf-8', $encoding); | 
            
                                                                        
                            
            
                                    
            
            
                | 43 | 11 |  |       } | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 45 | 22 |  |       return $html; | 
            
                                                                        
                            
            
                                    
            
            
                | 46 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |      * @return array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 22 |  |     private static function getSupportedEncodings() { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 | 22 |  |       if (self::$supportedEncodings !== null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 | 21 |  |         return self::$supportedEncodings; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |       } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 1 |  |       $hasAliasesFunction = function_exists('mb_encoding_aliases'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 | 1 |  |       self::$supportedEncodings = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 | 1 |  |       foreach (mb_list_encodings() as $encoding) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 | 1 |  |         $encoding = strtolower($encoding); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 | 1 |  |         if ($encoding === 'utf-8' or $encoding === 'utf8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 | 1 |  |           continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 | 1 |  |         self::$supportedEncodings[] = $encoding; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 | 1 |  |         if ($hasAliasesFunction) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 | 1 |  |           foreach (mb_encoding_aliases($encoding) as $encodingAlias) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 | 1 |  |             $encodingAlias = strtolower($encodingAlias); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 | 1 |  |             self::$supportedEncodings[] = $encodingAlias; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 | 1 |  |           } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 | 1 |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 1 |  |       } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 1 |  |       return self::$supportedEncodings; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 77 |  |  |   } |