| Total Complexity | 493 |
| Total Lines | 719 |
| Duplicated Lines | 0 % |
| Changes | 3 | ||
| Bugs | 0 | Features | 0 |
Complex classes like htmLawed often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use htmLawed, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | class htmLawed{ |
||
| 14 | // begin class |
||
| 15 | public static function hl($t, $C=1, $S=array()){ |
||
| 16 | $C = is_array($C) ? $C : array(); |
||
| 17 | if(!empty($C['valid_xhtml'])){ |
||
| 18 | $C['elements'] = empty($C['elements']) ? '*-acronym-big-center-dir-font-isindex-s-strike-tt' : $C['elements']; |
||
| 19 | $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2; |
||
| 20 | $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2; |
||
| 21 | } |
||
| 22 | // config eles |
||
| 23 | $e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'article'=>1, 'aside'=>1, 'audio'=>1, 'b'=>1, 'bdi'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'canvas'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'command'=>1, 'data'=>1, 'datalist'=>1, 'dd'=>1, 'del'=>1, 'details'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'figcaption'=>1, 'figure'=>1, 'font'=>1, 'footer'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'header'=>1, 'hgroup'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'keygen'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'link'=>1, 'main'=>1, 'map'=>1, 'mark'=>1, 'menu'=>1, 'meta'=>1, 'meter'=>1, 'nav'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'output'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'progress'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'section'=>1, 'select'=>1, 'small'=>1, 'source'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'style'=>1, 'sub'=>1, 'summary'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'time'=>1, 'tr'=>1, 'track'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1, 'video'=>1, 'wbr'=>1); // 118 incl. deprecated & some Ruby |
||
| 24 | |||
| 25 | if(!empty($C['safe'])){ |
||
| 26 | unset($e['applet'], $e['audio'], $e['canvas'], $e['embed'], $e['iframe'], $e['object'], $e['script'], $e['video']); |
||
| 27 | } |
||
| 28 | $x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*'; |
||
| 29 | if($x == '-*'){$e = array();} |
||
| 30 | elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));} |
||
| 31 | else{ |
||
| 32 | if(isset($x[1])){ |
||
| 33 | preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER); |
||
| 34 | for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];} |
||
| 35 | foreach($m as $v){ |
||
| 36 | if($v[0] == '+'){$e[substr($v, 1)] = 1;} |
||
| 37 | if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);} |
||
| 38 | } |
||
| 39 | } |
||
| 40 | } |
||
| 41 | $C['elements'] =& $e; |
||
| 42 | // config attrs |
||
| 43 | $x = !empty($C['deny_attribute']) ? strtolower(str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute'])) : ''; |
||
| 44 | $x = array_flip((isset($x[0]) && $x[0] == '*') ? str_replace('/', 'data-', explode('-', str_replace('data-', '/', $x))) : explode(',', $x. (!empty($C['safe']) ? ',on*' : ''))); |
||
| 45 | $C['deny_attribute'] = $x; |
||
| 46 | // config URLs |
||
| 47 | $x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, tel, telnet'. (empty($C['safe']) ? ', app, javascript; *: data, javascript, ' : '; *:'). 'file, http, https'; |
||
| 48 | $C['schemes'] = array(); |
||
| 49 | foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){ |
||
| 50 | $x = $x2 = null; list($x, $x2) = explode(':', $v, 2); |
||
| 51 | if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));} |
||
| 52 | } |
||
| 53 | if(!isset($C['schemes']['*'])){ |
||
| 54 | $C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1); |
||
| 55 | if(empty($C['safe'])){$C['schemes']['*'] += array('data'=>1, 'javascript'=>1);} |
||
| 56 | } |
||
| 57 | if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);} |
||
| 58 | $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; |
||
| 59 | if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){ |
||
| 60 | $C['base_url'] = $C['abs_url'] = 0; |
||
| 61 | } |
||
| 62 | // config rest |
||
| 63 | $C['and_mark'] = empty($C['and_mark']) ? 0 : 1; |
||
| 64 | $C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or htmLawed::hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or htmLawed::hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0; |
||
| 65 | $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0; |
||
| 66 | $C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1; |
||
| 67 | $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); |
||
| 68 | $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; |
||
| 69 | $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); |
||
| 70 | $C['css_expression'] = empty($C['css_expression']) ? 0 : 1; |
||
| 71 | $C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; |
||
| 72 | $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; |
||
| 73 | $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; |
||
| 74 | $C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; |
||
| 75 | $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; |
||
| 76 | $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1; |
||
| 77 | $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; |
||
| 78 | $C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1; |
||
| 79 | $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1; |
||
| 80 | $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body'; |
||
| 81 | $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0; |
||
| 82 | $C['style_pass'] = empty($C['style_pass']) ? 0 : 1; |
||
| 83 | $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy']; |
||
| 84 | $C['unique_ids'] = isset($C['unique_ids']) && (!preg_match('`\W`', $C['unique_ids'])) ? $C['unique_ids'] : 1; |
||
| 85 | $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0; |
||
| 86 | |||
| 87 | if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];} |
||
| 88 | $GLOBALS['C'] = $C; |
||
| 89 | $S = is_array($S) ? $S : htmLawed::hl_spec($S); |
||
| 90 | if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];} |
||
| 91 | $GLOBALS['S'] = $S; |
||
| 92 | |||
| 93 | $t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t); |
||
| 94 | if($C['clean_ms_char']){ |
||
| 95 | $x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ'); |
||
| 96 | $x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"')); |
||
| 97 | $t = strtr($t, $x); |
||
| 98 | } |
||
| 99 | if($C['cdata'] or $C['comment']){$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'htmLawed::hl_cmtcd', $t);} |
||
| 100 | $t = preg_replace_callback('`&([a-zA-Z][a-zA-Z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'htmLawed::hl_ent', str_replace('&', '&', $t)); |
||
| 101 | if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();} |
||
| 102 | if($C['hook']){$t = $C['hook']($t, $C, $S);} |
||
| 103 | if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){ |
||
| 104 | $GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime()); |
||
| 105 | } |
||
| 106 | // main |
||
| 107 | $t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'htmLawed::hl_tag', $t); |
||
| 108 | $t = $C['balance'] ? htmLawed::hl_bal($t, $C['keep_bad'], $C['parent']) : $t; |
||
| 109 | $t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t; |
||
| 110 | $t = $C['tidy'] ? htmLawed::hl_tidy($t, $C['tidy'], $C['parent']) : $t; |
||
| 111 | unset($C, $e); |
||
| 112 | if(isset($reC)){$GLOBALS['C'] = $reC;} |
||
| 113 | if(isset($reS)){$GLOBALS['S'] = $reS;} |
||
| 114 | return $t; |
||
| 115 | } |
||
| 116 | |||
| 117 | public static function hl_attrval($a, $t, $p){ |
||
| 118 | // check attr val against $S |
||
| 119 | static $ma = array('accesskey', 'class', 'itemtype', 'rel'); |
||
| 120 | $s = in_array($a, $ma) ? ' ' : ($a == 'srcset' ? ',': ''); |
||
| 121 | $r = array(); |
||
| 122 | $t = !empty($s) ? explode($s, $t) : array($t); |
||
| 123 | foreach($t as $tk=>$tv){ |
||
| 124 | $o = 1; $tv = trim($tv); $l = strlen($tv); |
||
| 125 | foreach($p as $k=>$v){ |
||
| 126 | if(!$l){continue;} |
||
| 127 | switch($k){ |
||
| 128 | case 'maxlen': if($l > $v){$o = 0;} |
||
| 129 | break; case 'minlen': if($l < $v){$o = 0;} |
||
| 130 | break; case 'maxval': if((float)($tv) > $v){$o = 0;} |
||
| 131 | break; case 'minval': if((float)($tv) < $v){$o = 0;} |
||
| 132 | break; case 'match': if(!preg_match($v, $tv)){$o = 0;} |
||
| 133 | break; case 'nomatch': if(preg_match($v, $tv)){$o = 0;} |
||
| 134 | break; case 'oneof': |
||
| 135 | $m = 0; |
||
| 136 | foreach(explode('|', $v) as $n){if($tv == $n){$m = 1; break;}} |
||
| 137 | $o = $m; |
||
| 138 | break; case 'noneof': |
||
| 139 | $m = 1; |
||
| 140 | foreach(explode('|', $v) as $n){if($tv == $n){$m = 0; break;}} |
||
| 141 | $o = $m; |
||
| 142 | break; default: |
||
| 143 | break; |
||
| 144 | } |
||
| 145 | if(!$o){break;} |
||
| 146 | } |
||
| 147 | if($o){$r[] = $tv;} |
||
| 148 | } |
||
| 149 | if($s == ','){$s = ', ';} |
||
| 150 | $r = implode($s, $r); |
||
| 151 | return (isset($r[0]) ? $r : (isset($p['default']) ? $p['default'] : 0)); |
||
| 152 | } |
||
| 153 | |||
| 154 | public static function hl_bal($t, $do=1, $in='div'){ |
||
| 155 | // balance tags |
||
| 156 | // by content |
||
| 157 | $cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block |
||
| 158 | $cE = array('area'=>1, 'br'=>1, 'col'=>1, 'command'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'keygen'=>1, 'link'=>1, 'meta'=>1, 'param'=>1, 'source'=>1, 'track'=>1, 'wbr'=>1); // Empty |
||
| 159 | $cF = array('a'=>1, 'article'=>1, 'aside'=>1, 'audio'=>1, 'button'=>1, 'canvas'=>1, 'del'=>1, 'details'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'figure'=>1, 'footer'=>1, 'header'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'main'=>1, 'menu'=>1, 'nav'=>1, 'noscript'=>1, 'object'=>1, 'section'=>1, 'style'=>1, 'td'=>1, 'th'=>1, 'video'=>1); // Flow; later context-wise dynamic move of ins & del to $cI |
||
| 160 | $cI = array('abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdi'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'data'=>1, 'datalist'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'figcaption'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hgroup'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'mark'=>1, 'meter'=>1, 'output'=>1, 'p'=>1, 'pre'=>1, 'progress'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'summary'=>1, 'sup'=>1, 'time'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline |
||
| 161 | $cN = array('a'=>array('a'=>1, 'address'=>1, 'button'=>1, 'details'=>1, 'embed'=>1, 'keygen'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'address'=>array('address'=>1, 'article'=>1, 'aside'=>1, 'header'=>1, 'keygen'=>1, 'footer'=>1, 'nav'=>1, 'section'=>1), 'button'=>array('a'=>1, 'address'=>1, 'button'=>1, 'details'=>1, 'embed'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'keygen'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'footer'=>array('header'=>1, 'footer'=>1), 'form'=>array('form'=>1), 'header'=>array('header'=>1, 'footer'=>1), 'label'=>array('label'=>1), 'main'=>array('main'=>1), 'meter'=>array('meter'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'progress'=>array('progress'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1), 'time'=>array('time'=>1), ); // Illegal |
||
| 162 | $cN2 = array_keys($cN); |
||
| 163 | $cS = array('colgroup'=>array('col'=>1), 'datalist'=>array('option'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'hgroup'=>array('h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child |
||
| 164 | if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] = $cS['menu'] += array('menu'=>1, 'ol'=>1, 'ul'=>1);} |
||
| 165 | $cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'audio'=>array('source'=>1, 'track'=>1), 'blockquote'=>array('script'=>1), 'details'=>array('summary'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'figure'=>array('figcaption'=>1),'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1), 'video'=>array('source'=>1, 'track'=>1)); // Other |
||
| 166 | $cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing |
||
| 167 | // block/inline type; a/ins/del both type; #pcdata: text |
||
| 168 | $eB = array('a'=>1, 'address'=>1, 'article'=>1, 'aside'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'details'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'figure'=>1, 'footer'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'header'=>1, 'hr'=>1, 'isindex'=>1, 'main'=>1, 'menu'=>1, 'nav'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'section'=>1, 'style'=>1, 'table'=>1, 'ul'=>1); |
||
| 169 | $eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'audio'=>1, 'b'=>1, 'bdi'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'canvas'=>1, 'cite'=>1, 'code'=>1, 'command'=>1, 'data'=>1, 'datalist'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'figcaption'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'link'=>1, 'map'=>1, 'mark'=>1, 'meta'=>1, 'meter'=>1, 'object'=>1, 'output'=>1, 'progress'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'summary'=>1, 'sup'=>1, 'textarea'=>1, 'time'=>1, 'tt'=>1, 'u'=>1, 'var'=>1, 'video'=>1, 'wbr'=>1); |
||
| 170 | $eN = array('a'=>1, 'address'=>1, 'article'=>1, 'aside'=>1, 'big'=>1, 'button'=>1, 'details'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'footer'=>1, 'form'=>1, 'header'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'keygen'=>1, 'label'=>1, 'meter'=>1, 'nav'=>1, 'object'=>1, 'progress'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'time'=>1); // Exclude from specific ele; $cN values |
||
| 171 | $eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'command'=>1, 'dd'=>1, 'dt'=>1, 'hgroup'=>1, 'keygen'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'source'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1, 'track'=>1); // Missing in $eB & $eI |
||
| 172 | $eF = $eB + $eI; |
||
| 173 | |||
| 174 | // $in sets allowed child |
||
| 175 | $in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div'; |
||
| 176 | if(isset($cE[$in])){ |
||
| 177 | return (!$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t)); |
||
| 178 | } |
||
| 179 | if(isset($cS[$in])){$inOk = $cS[$in];} |
||
| 180 | elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;} |
||
| 181 | elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);} |
||
| 182 | elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);} |
||
| 183 | if(isset($cO[$in])){$inOk = $inOk + $cO[$in];} |
||
| 184 | if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);} |
||
| 185 | |||
| 186 | $t = explode('<', $t); |
||
| 187 | $ok = $q = array(); // $q seq list of open non-empty ele |
||
| 188 | ob_start(); |
||
| 189 | |||
| 190 | for($i=-1, $ci=count($t); ++$i<$ci;){ |
||
| 191 | // allowed $ok in parent $p |
||
| 192 | if($ql = count($q)){ |
||
| 193 | $p = array_pop($q); |
||
| 194 | $q[] = $p; |
||
| 195 | if(isset($cS[$p])){$ok = $cS[$p];} |
||
| 196 | elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;} |
||
| 197 | elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);} |
||
| 198 | elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);} |
||
| 199 | if(isset($cO[$p])){$ok = $ok + $cO[$p];} |
||
| 200 | if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);} |
||
| 201 | }else{$ok = $inOk; unset($cI['del'], $cI['ins']);} |
||
| 202 | // bad tags, & ele content |
||
| 203 | if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){ |
||
| 204 | echo '<', $s, $e, $a, '>'; |
||
| 205 | } |
||
| 206 | if(isset($x[0])){ |
||
| 207 | if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ |
||
| 208 | echo '<div>', $x, '</div>'; |
||
| 209 | } |
||
| 210 | elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} |
||
| 211 | elseif(strpos($x, "\x02\x04")){ |
||
| 212 | foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ |
||
| 213 | echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); |
||
| 214 | } |
||
| 215 | }elseif($do > 4){echo preg_replace('`\S`', '', $x);} |
||
| 216 | } |
||
| 217 | // get markup |
||
| 218 | if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} |
||
| 219 | $s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r; |
||
| 220 | // close tag |
||
| 221 | if($s){ |
||
| 222 | if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen |
||
| 223 | if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open |
||
| 224 | $add = ''; // Nesting - close open tags that need to be |
||
| 225 | for($j=-1, $cj=count($q); ++$j<$cj;){ |
||
| 226 | if(($d = array_pop($q)) == $e){break;} |
||
| 227 | else{$add .= "</{$d}>";} |
||
| 228 | } |
||
| 229 | echo $add, '</', $e, '>'; unset($e); continue; |
||
| 230 | } |
||
| 231 | // open tag |
||
| 232 | // $cB ele needs $eB ele as child |
||
| 233 | if(isset($cB[$e]) && strlen(trim($x))){ |
||
| 234 | $t[$i] = "{$e}{$a}>"; |
||
| 235 | array_splice($t, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue; |
||
| 236 | } |
||
| 237 | if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){ |
||
| 238 | array_splice($t, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue; |
||
| 239 | } |
||
| 240 | // if no open ele, $in = parent; mostly immediate parent-child relation should hold |
||
| 241 | if(!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)){ |
||
| 242 | if(!isset($ok[$e])){ |
||
| 243 | if($ql && isset($cT[$p])){echo '</', array_pop($q), '>'; unset($e, $x); --$i;} |
||
| 244 | continue; |
||
| 245 | } |
||
| 246 | if(!isset($cE[$e])){$q[] = $e;} |
||
| 247 | echo '<', $e, $a, '>'; unset($e); continue; |
||
| 248 | } |
||
| 249 | // specific parent-child |
||
| 250 | if(isset($cS[$p][$e])){ |
||
| 251 | if(!isset($cE[$e])){$q[] = $e;} |
||
| 252 | echo '<', $e, $a, '>'; unset($e); continue; |
||
| 253 | } |
||
| 254 | // nesting |
||
| 255 | $add = ''; |
||
| 256 | $q2 = array(); |
||
| 257 | for($k=-1, $kc=count($q); ++$k<$kc;){ |
||
| 258 | $d = $q[$k]; |
||
| 259 | $ok2 = array(); |
||
| 260 | if(isset($cS[$d])){$q2[] = $d; continue;} |
||
| 261 | $ok2 = isset($cI[$d]) ? $eI : $eF; |
||
| 262 | if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];} |
||
| 263 | if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);} |
||
| 264 | if(!isset($ok2[$e])){ |
||
| 265 | if(!$k && !isset($inOk[$e])){continue 2;} |
||
| 266 | $add = "</{$d}>"; |
||
| 267 | for(;++$k<$kc;){$add = "</{$q[$k]}>{$add}";} |
||
| 268 | break; |
||
| 269 | } |
||
| 270 | else{$q2[] = $d;} |
||
| 271 | } |
||
| 272 | $q = $q2; |
||
| 273 | if(!isset($cE[$e])){$q[] = $e;} |
||
| 274 | echo $add, '<', $e, $a, '>'; unset($e); continue; |
||
| 275 | } |
||
| 276 | |||
| 277 | // end |
||
| 278 | if($ql = count($q)){ |
||
| 279 | $p = array_pop($q); |
||
| 280 | $q[] = $p; |
||
| 281 | if(isset($cS[$p])){$ok = $cS[$p];} |
||
| 282 | elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;} |
||
| 283 | elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);} |
||
| 284 | elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);} |
||
| 285 | if(isset($cO[$p])){$ok = $ok + $cO[$p];} |
||
| 286 | if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);} |
||
| 287 | }else{$ok = $inOk; unset($cI['del'], $cI['ins']);} |
||
| 288 | if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){ |
||
| 289 | echo '<', $s, $e, $a, '>'; |
||
| 290 | } |
||
| 291 | if(isset($x[0])){ |
||
| 292 | if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ |
||
| 293 | echo '<div>', $x, '</div>'; |
||
| 294 | } |
||
| 295 | elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} |
||
| 296 | elseif(strpos($x, "\x02\x04")){ |
||
| 297 | foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ |
||
| 298 | echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); |
||
| 299 | } |
||
| 300 | }elseif($do > 4){echo preg_replace('`\S`', '', $x);} |
||
| 301 | } |
||
| 302 | while(!empty($q) && ($e = array_pop($q))){echo '</', $e, '>';} |
||
| 303 | $o = ob_get_contents(); |
||
| 304 | ob_end_clean(); |
||
| 305 | return $o; |
||
| 306 | } |
||
| 307 | |||
| 308 | public static function hl_cmtcd($t){ |
||
| 309 | // comment/CDATA sec handler |
||
| 310 | $t = $t[0]; |
||
| 311 | global $C; |
||
| 312 | if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;} |
||
| 313 | if($v == 1){return '';} |
||
| 314 | if($n == 'comment' && $v < 4){ |
||
| 315 | if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';} |
||
| 316 | } |
||
| 317 | else{$t = substr($t, 1, -1);} |
||
| 318 | $t = $v == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; |
||
| 319 | return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); |
||
| 320 | } |
||
| 321 | |||
| 322 | public static function hl_ent($t){ |
||
| 323 | // entitity handler |
||
| 324 | global $C; |
||
| 325 | $t = $t[1]; |
||
| 326 | static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1); |
||
| 327 | static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255'); |
||
| 328 | if($t[0] != '#'){ |
||
| 329 | return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';'; |
||
| 330 | } |
||
| 331 | if(($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))){ |
||
| 332 | return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};"; |
||
| 333 | } |
||
| 334 | return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';'; |
||
| 335 | } |
||
| 336 | |||
| 337 | public static function hl_prot($p, $c=null){ |
||
| 338 | // check URL scheme |
||
| 339 | global $C; |
||
| 340 | $b = $a = ''; |
||
| 341 | if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);} |
||
| 342 | $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; |
||
| 343 | static $d = 'denied:'; |
||
| 344 | if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} |
||
| 345 | if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param |
||
| 346 | if(preg_match('`^([^:?[@!$()*,=/\'\]]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot |
||
| 347 | return "{$b}{$d}{$p}{$a}"; |
||
| 348 | } |
||
| 349 | if($C['abs_url']){ |
||
| 350 | if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel |
||
| 351 | $p = substr($p, strlen($C['base_url'])); |
||
| 352 | }elseif(empty($m[1])){ // Make URL abs |
||
| 353 | if(substr($p, 0, 2) == '//'){$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p;} |
||
| 354 | elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p;} |
||
| 355 | elseif(strcspn($p, './')){$p = $C['base_url']. $p;} |
||
| 356 | else{ |
||
| 357 | preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m); |
||
| 358 | $p = preg_replace('`(?<=/)\./`', '', $m[2]. $p); |
||
| 359 | while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){ |
||
| 360 | $p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p); |
||
| 361 | } |
||
| 362 | $p = $m[1]. $p; |
||
| 363 | } |
||
| 364 | } |
||
| 365 | } |
||
| 366 | return "{$b}{$p}{$a}"; |
||
| 367 | } |
||
| 368 | |||
| 369 | public static function hl_regex($p){ |
||
| 370 | // check regex |
||
| 371 | if(empty($p)){return 0;} |
||
| 372 | if($v = function_exists('error_clear_last') && function_exists('error_get_last')){error_clear_last();} |
||
| 373 | else{ |
||
| 374 | if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;} |
||
| 375 | else{ini_set('track_errors', 1);} |
||
| 376 | unset($php_errormsg); |
||
| 377 | } |
||
| 378 | if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);} |
||
| 379 | preg_match($p, ''); |
||
| 380 | if($v){$r = error_get_last() == null ? 1 : 0; } |
||
| 381 | else{ |
||
| 382 | $r = isset($php_errormsg) ? 0 : 1; |
||
| 383 | if($t){$php_errormsg = isset($o) ? $o : null;} |
||
| 384 | else{ini_set('track_errors', 0);} |
||
| 385 | } |
||
| 386 | if($d){ini_set('display_errors', 1);} |
||
| 387 | return $r; |
||
| 388 | } |
||
| 389 | |||
| 390 | public static function hl_spec($t){ |
||
| 391 | // final $spec |
||
| 392 | $s = array(); |
||
| 393 | if(!function_exists('hl_aux1')){function hl_aux1($m){ |
||
| 394 | return substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", '`"'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", '"'), $m[0]), 1, -1); |
||
| 395 | }} |
||
| 396 | $t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace_callback('/"(?>(`.|[^"])*)"/sm', 'hl_aux1', trim($t))); |
||
| 397 | for($i = count(($t = explode(';', $t))); --$i>=0;){ |
||
| 398 | $w = $t[$i]; |
||
| 399 | if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;} |
||
| 400 | $y = $n = array(); |
||
| 401 | foreach(explode(',', $a) as $v){ |
||
| 402 | if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;} |
||
| 403 | if(($x = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;} |
||
| 404 | if($x[0] == '-'){$n[substr($x, 1)] = 1; continue;} |
||
| 405 | if(!isset($m[2])){$y[$x] = 1; continue;} |
||
| 406 | foreach(explode('/', $m[2]) as $m){ |
||
| 407 | if(empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5){$y[$x] = 1; continue;} |
||
| 408 | $y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1)); |
||
| 409 | } |
||
| 410 | if(isset($y[$x]['match']) && !htmLawed::hl_regex($y[$x]['match'])){unset($y[$x]['match']);} |
||
| 411 | if(isset($y[$x]['nomatch']) && !htmLawed::hl_regex($y[$x]['nomatch'])){unset($y[$x]['nomatch']);} |
||
| 412 | } |
||
| 413 | if(!count($y) && !count($n)){continue;} |
||
| 414 | foreach(explode(',', substr($w, 0, $e)) as $v){ |
||
| 415 | if(!strlen(($v = strtolower($v)))){continue;} |
||
| 416 | if(count($y)){if(!isset($s[$v])){$s[$v] = $y;} else{$s[$v] = array_merge($s[$v], $y);}} |
||
| 417 | if(count($n)){if(!isset($s[$v]['n'])){$s[$v]['n'] = $n;} else{$s[$v]['n'] = array_merge($s[$v]['n'], $n);}} |
||
| 418 | } |
||
| 419 | } |
||
| 420 | return $s; |
||
| 421 | } |
||
| 422 | |||
| 423 | public static function hl_tag($t){ |
||
| 645 | } |
||
| 646 | |||
| 647 | public static function hl_tag2(&$e, &$a, $t=1){ |
||
| 648 | // transform tag |
||
| 649 | if($e == 'big'){$e = 'span'; return 'font-size: larger;';} |
||
| 650 | if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';} |
||
| 651 | if($e == 'tt'){$e = 'code'; return '';} |
||
| 652 | if($e == 'center'){$e = 'div'; return 'text-align: center;';} |
||
| 653 | static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); |
||
| 654 | if($e == 'font'){ |
||
| 655 | $a2 = ''; |
||
| 656 | while(preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $a, $m)){ |
||
| 657 | $a = str_replace($m[0], ' ', $a); |
||
| 658 | $a2 .= strtolower($m[2]) == 'color' ? (' color: '. str_replace('"', '\'', trim($m[4])). ';') : (isset($fs[($m = trim($m[4]))]) ? ($a2 .= ' font-size: '. str_replace('"', '\'', $fs[$m]). ';') : ''); |
||
| 659 | } |
||
| 660 | while(preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $a, $m) or preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $a, $m)){ |
||
| 661 | $a = str_replace($m[0], ' ', $a); |
||
| 662 | $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[3])). ';'; |
||
| 663 | } |
||
| 664 | $e = 'span'; return ltrim(str_replace('<', '', $a2)); |
||
| 665 | } |
||
| 666 | if($e == 'acronym'){$e = 'abbr'; return '';} |
||
| 667 | if($e == 'dir'){$e = 'ul'; return '';} |
||
| 668 | if($t == 2){$e = 0; return 0;} |
||
| 669 | return ''; |
||
| 670 | } |
||
| 671 | |||
| 672 | public static function hl_tidy($t, $w, $p){ |
||
| 673 | // tidy/compact HTM |
||
| 674 | if(strpos(' pre,script,textarea', "$p,")){return $t;} |
||
| 675 | if(!function_exists('hl_aux2')){function hl_aux2($m){ |
||
| 677 | }} |
||
| 678 | $t = preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), 'hl_aux2', $t)); |
||
| 679 | if(($w = strtolower($w)) == -1){ |
||
| 680 | return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); |
||
| 681 | } |
||
| 682 | $s = strpos(" $w", 't') ? "\t" : ' '; |
||
| 683 | $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); |
||
| 684 | $N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; |
||
| 685 | $a = array('br'=>1); |
||
| 686 | $b = array('button'=>1, 'command'=>1, 'input'=>1, 'option'=>1, 'param'=>1, 'track'=>1); |
||
| 687 | $c = array('audio'=>1, 'canvas'=>1, 'caption'=>1, 'dd'=>1, 'dt'=>1, 'figcaption'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'style'=>1, 'summary'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1, 'video'=>1); |
||
| 688 | $d = array('address'=>1, 'article'=>1, 'aside'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'datalist'=>1, 'details'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'figure'=>1, 'footer'=>1, 'form'=>1, 'header'=>1, 'hgroup'=>1, 'hr'=>1, 'iframe'=>1, 'main'=>1, 'map'=>1, 'menu'=>1, 'nav'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'section'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); |
||
| 689 | $T = explode('<', $t); |
||
| 690 | $X = 1; |
||
| 691 | while($X){ |
||
| 692 | $n = $N; |
||
| 693 | $t = $T; |
||
| 694 | ob_start(); |
||
| 695 | if(isset($d[$p])){echo str_repeat($s, ++$n);} |
||
| 696 | echo ltrim(array_shift($t)); |
||
| 697 | for($i=-1, $j=count($t); ++$i<$j;){ |
||
| 698 | $r = ''; list($e, $r) = explode('>', $t[$i]); |
||
| 699 | $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); |
||
| 700 | $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); |
||
| 701 | $e = "<$e>"; |
||
| 702 | if(isset($d[$y])){ |
||
| 703 | if(!$x){ |
||
| 704 | if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} |
||
| 705 | else{++$N; ob_end_clean(); continue 2;} |
||
| 706 | } |
||
| 707 | else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} |
||
| 708 | echo $r; continue; |
||
| 709 | } |
||
| 710 | $f = "\n". str_repeat($s, $n); |
||
| 711 | if(isset($c[$y])){ |
||
| 712 | if(!$x){echo $e, $f, $r;} |
||
| 713 | else{echo $f, $e, $r;} |
||
| 714 | }elseif(isset($b[$y])){echo $f, $e, $r; |
||
| 715 | }elseif(isset($a[$y])){echo $e, $f, $r; |
||
| 716 | }elseif(!$y){echo $f, $e, $f, $r; |
||
| 717 | }else{echo $e, $r;} |
||
| 718 | } |
||
| 719 | $X = 0; |
||
| 720 | } |
||
| 721 | $t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents())); |
||
| 722 | ob_end_clean(); |
||
| 723 | if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){ |
||
| 724 | $t = str_replace("\n", $l, $t); |
||
| 725 | } |
||
| 726 | return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); |
||
| 727 | } |
||
| 728 | |||
| 729 | public static function hl_version(){ |
||
| 732 | } |
||
| 733 | |||
| 736 |