| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace Xparse\Parser\Helper; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * Class EncodingConverter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * @package Xparse\Parser\Helper | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | class HtmlEncodingConverter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |   /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |    * @var array|null | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |    */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |   private static $supportedEncodings = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |   /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |    * @param $html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |    * @param bool $contentType | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |    * @return string | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 21 |  |  |    */ | 
            
                                                                        
                            
            
                                    
            
            
                | 22 | 24 |  |   public static function convertToUtf($html, $contentType = false) { | 
            
                                                                        
                            
            
                                    
            
            
                | 23 | 24 |  |     if (!empty($contentType)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 24 | 6 |  |       preg_match("!^.*charset=([A-Za-z0-9-]{4,})$!", $contentType, $contentTypeData); | 
            
                                                                        
                            
            
                                    
            
            
                | 25 | 6 |  |       $encoding = !empty($contentTypeData[1]) ? strtoupper(trim($contentTypeData[1])) : ''; | 
            
                                                                        
                            
            
                                    
            
            
                | 26 | 6 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 27 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 28 | 24 |  |     if (empty($encoding)){ | 
            
                                                                        
                            
            
                                    
            
            
                | 29 | 21 |  |       preg_match("!.*<meta.*charset=[\"']?[ \t]*([A-Za-z0-9-]{4,})[ \t]*[\"']!mi", $html, $metaContentType); | 
            
                                                                        
                            
            
                                    
            
            
                | 30 | 21 |  |       $encoding = !empty($metaContentType[1]) ? strtoupper(trim($metaContentType[1])) : ''; | 
            
                                                                        
                            
            
                                    
            
            
                | 31 | 21 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 33 | 24 |  |     $supportedEncodings = self::getSupportedEncodings(); | 
            
                                                                        
                            
            
                                    
            
            
                | 34 | 24 |  |     if (in_array($encoding, $supportedEncodings)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 35 | 11 |  |       $html = mb_convert_encoding($html, 'UTF-8', $encoding); | 
            
                                                                        
                            
            
                                    
            
            
                | 36 | 11 |  |     } | 
            
                                                                        
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 38 | 24 |  |     return $html; | 
            
                                                                        
                            
            
                                    
            
            
                | 39 |  |  |   } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |   /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |    * @return array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |    */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 | 24 |  |   private static function getSupportedEncodings() { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 | 24 |  |     if (self::$supportedEncodings !== null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 | 23 |  |       return self::$supportedEncodings; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 | 1 |  |     self::$supportedEncodings = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 | 1 |  |     foreach (mb_list_encodings() as $encoding) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 1 |  |       if ($encoding == 'UTF-8' or $encoding == 'UTF8') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 | 1 |  |         continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |       } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 | 1 |  |       self::$supportedEncodings[] = $encoding; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 1 |  |       self::$supportedEncodings = array_merge(self::$supportedEncodings, mb_encoding_aliases($encoding)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 | 1 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 | 1 |  |     return self::$supportedEncodings; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 61 |  |  |   } | 
            
                                                        
            
                                    
            
            
                | 62 |  |  | } |