@@ -3,158 +3,158 @@ |
||
| 3 | 3 | class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy |
| 4 | 4 | { |
| 5 | 5 | |
| 6 | - public function makeFixes() { |
|
| 7 | - |
|
| 8 | - $r = array(); |
|
| 9 | - |
|
| 10 | - // == deprecated tag transforms =================================== |
|
| 11 | - |
|
| 12 | - $r['font'] = new HTMLPurifier_TagTransform_Font(); |
|
| 13 | - $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); |
|
| 14 | - $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); |
|
| 15 | - $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;'); |
|
| 16 | - $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;'); |
|
| 17 | - $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); |
|
| 18 | - $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); |
|
| 19 | - |
|
| 20 | - // == deprecated attribute transforms ============================= |
|
| 21 | - |
|
| 22 | - $r['caption@align'] = |
|
| 23 | - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 24 | - // we're following IE's behavior, not Firefox's, due |
|
| 25 | - // to the fact that no one supports caption-side:right, |
|
| 26 | - // W3C included (with CSS 2.1). This is a slightly |
|
| 27 | - // unreasonable attribute! |
|
| 28 | - 'left' => 'text-align:left;', |
|
| 29 | - 'right' => 'text-align:right;', |
|
| 30 | - 'top' => 'caption-side:top;', |
|
| 31 | - 'bottom' => 'caption-side:bottom;' // not supported by IE |
|
| 32 | - )); |
|
| 33 | - |
|
| 34 | - // @align for img ------------------------------------------------- |
|
| 35 | - $r['img@align'] = |
|
| 36 | - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 37 | - 'left' => 'float:left;', |
|
| 38 | - 'right' => 'float:right;', |
|
| 39 | - 'top' => 'vertical-align:top;', |
|
| 40 | - 'middle' => 'vertical-align:middle;', |
|
| 41 | - 'bottom' => 'vertical-align:baseline;', |
|
| 42 | - )); |
|
| 43 | - |
|
| 44 | - // @align for table ----------------------------------------------- |
|
| 45 | - $r['table@align'] = |
|
| 46 | - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 47 | - 'left' => 'float:left;', |
|
| 48 | - 'center' => 'margin-left:auto;margin-right:auto;', |
|
| 49 | - 'right' => 'float:right;' |
|
| 50 | - )); |
|
| 51 | - |
|
| 52 | - // @align for hr ----------------------------------------------- |
|
| 53 | - $r['hr@align'] = |
|
| 54 | - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 55 | - // we use both text-align and margin because these work |
|
| 56 | - // for different browsers (IE and Firefox, respectively) |
|
| 57 | - // and the melange makes for a pretty cross-compatible |
|
| 58 | - // solution |
|
| 59 | - 'left' => 'margin-left:0;margin-right:auto;text-align:left;', |
|
| 60 | - 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', |
|
| 61 | - 'right' => 'margin-left:auto;margin-right:0;text-align:right;' |
|
| 62 | - )); |
|
| 63 | - |
|
| 64 | - // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- |
|
| 65 | - // {{{ |
|
| 66 | - $align_lookup = array(); |
|
| 67 | - $align_values = array('left', 'right', 'center', 'justify'); |
|
| 68 | - foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; |
|
| 69 | - // }}} |
|
| 70 | - $r['h1@align'] = |
|
| 71 | - $r['h2@align'] = |
|
| 72 | - $r['h3@align'] = |
|
| 73 | - $r['h4@align'] = |
|
| 74 | - $r['h5@align'] = |
|
| 75 | - $r['h6@align'] = |
|
| 76 | - $r['p@align'] = |
|
| 77 | - $r['div@align'] = |
|
| 78 | - new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); |
|
| 79 | - |
|
| 80 | - // @bgcolor for table, tr, td, th --------------------------------- |
|
| 81 | - $r['table@bgcolor'] = |
|
| 82 | - $r['td@bgcolor'] = |
|
| 83 | - $r['th@bgcolor'] = |
|
| 84 | - new HTMLPurifier_AttrTransform_BgColor(); |
|
| 85 | - |
|
| 86 | - // @border for img ------------------------------------------------ |
|
| 87 | - $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); |
|
| 88 | - |
|
| 89 | - // @clear for br -------------------------------------------------- |
|
| 90 | - $r['br@clear'] = |
|
| 91 | - new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( |
|
| 92 | - 'left' => 'clear:left;', |
|
| 93 | - 'right' => 'clear:right;', |
|
| 94 | - 'all' => 'clear:both;', |
|
| 95 | - 'none' => 'clear:none;', |
|
| 96 | - )); |
|
| 97 | - |
|
| 98 | - // @height for td, th --------------------------------------------- |
|
| 99 | - $r['td@height'] = |
|
| 100 | - $r['th@height'] = |
|
| 101 | - new HTMLPurifier_AttrTransform_Length('height'); |
|
| 102 | - |
|
| 103 | - // @hspace for img ------------------------------------------------ |
|
| 104 | - $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); |
|
| 105 | - |
|
| 106 | - // @noshade for hr ------------------------------------------------ |
|
| 107 | - // this transformation is not precise but often good enough. |
|
| 108 | - // different browsers use different styles to designate noshade |
|
| 109 | - $r['hr@noshade'] = |
|
| 110 | - new HTMLPurifier_AttrTransform_BoolToCSS( |
|
| 111 | - 'noshade', |
|
| 112 | - 'color:#808080;background-color:#808080;border:0;' |
|
| 113 | - ); |
|
| 114 | - |
|
| 115 | - // @nowrap for td, th --------------------------------------------- |
|
| 116 | - $r['td@nowrap'] = |
|
| 117 | - $r['th@nowrap'] = |
|
| 118 | - new HTMLPurifier_AttrTransform_BoolToCSS( |
|
| 119 | - 'nowrap', |
|
| 120 | - 'white-space:nowrap;' |
|
| 121 | - ); |
|
| 122 | - |
|
| 123 | - // @size for hr -------------------------------------------------- |
|
| 124 | - $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); |
|
| 125 | - |
|
| 126 | - // @type for li, ol, ul ------------------------------------------- |
|
| 127 | - // {{{ |
|
| 128 | - $ul_types = array( |
|
| 129 | - 'disc' => 'list-style-type:disc;', |
|
| 130 | - 'square' => 'list-style-type:square;', |
|
| 131 | - 'circle' => 'list-style-type:circle;' |
|
| 132 | - ); |
|
| 133 | - $ol_types = array( |
|
| 134 | - '1' => 'list-style-type:decimal;', |
|
| 135 | - 'i' => 'list-style-type:lower-roman;', |
|
| 136 | - 'I' => 'list-style-type:upper-roman;', |
|
| 137 | - 'a' => 'list-style-type:lower-alpha;', |
|
| 138 | - 'A' => 'list-style-type:upper-alpha;' |
|
| 139 | - ); |
|
| 140 | - $li_types = $ul_types + $ol_types; |
|
| 141 | - // }}} |
|
| 142 | - |
|
| 143 | - $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); |
|
| 144 | - $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); |
|
| 145 | - $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); |
|
| 146 | - |
|
| 147 | - // @vspace for img ------------------------------------------------ |
|
| 148 | - $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); |
|
| 149 | - |
|
| 150 | - // @width for hr, td, th ------------------------------------------ |
|
| 151 | - $r['td@width'] = |
|
| 152 | - $r['th@width'] = |
|
| 153 | - $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); |
|
| 154 | - |
|
| 155 | - return $r; |
|
| 156 | - |
|
| 157 | - } |
|
| 6 | + public function makeFixes() { |
|
| 7 | + |
|
| 8 | + $r = array(); |
|
| 9 | + |
|
| 10 | + // == deprecated tag transforms =================================== |
|
| 11 | + |
|
| 12 | + $r['font'] = new HTMLPurifier_TagTransform_Font(); |
|
| 13 | + $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); |
|
| 14 | + $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); |
|
| 15 | + $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;'); |
|
| 16 | + $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;'); |
|
| 17 | + $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); |
|
| 18 | + $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); |
|
| 19 | + |
|
| 20 | + // == deprecated attribute transforms ============================= |
|
| 21 | + |
|
| 22 | + $r['caption@align'] = |
|
| 23 | + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 24 | + // we're following IE's behavior, not Firefox's, due |
|
| 25 | + // to the fact that no one supports caption-side:right, |
|
| 26 | + // W3C included (with CSS 2.1). This is a slightly |
|
| 27 | + // unreasonable attribute! |
|
| 28 | + 'left' => 'text-align:left;', |
|
| 29 | + 'right' => 'text-align:right;', |
|
| 30 | + 'top' => 'caption-side:top;', |
|
| 31 | + 'bottom' => 'caption-side:bottom;' // not supported by IE |
|
| 32 | + )); |
|
| 33 | + |
|
| 34 | + // @align for img ------------------------------------------------- |
|
| 35 | + $r['img@align'] = |
|
| 36 | + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 37 | + 'left' => 'float:left;', |
|
| 38 | + 'right' => 'float:right;', |
|
| 39 | + 'top' => 'vertical-align:top;', |
|
| 40 | + 'middle' => 'vertical-align:middle;', |
|
| 41 | + 'bottom' => 'vertical-align:baseline;', |
|
| 42 | + )); |
|
| 43 | + |
|
| 44 | + // @align for table ----------------------------------------------- |
|
| 45 | + $r['table@align'] = |
|
| 46 | + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 47 | + 'left' => 'float:left;', |
|
| 48 | + 'center' => 'margin-left:auto;margin-right:auto;', |
|
| 49 | + 'right' => 'float:right;' |
|
| 50 | + )); |
|
| 51 | + |
|
| 52 | + // @align for hr ----------------------------------------------- |
|
| 53 | + $r['hr@align'] = |
|
| 54 | + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( |
|
| 55 | + // we use both text-align and margin because these work |
|
| 56 | + // for different browsers (IE and Firefox, respectively) |
|
| 57 | + // and the melange makes for a pretty cross-compatible |
|
| 58 | + // solution |
|
| 59 | + 'left' => 'margin-left:0;margin-right:auto;text-align:left;', |
|
| 60 | + 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', |
|
| 61 | + 'right' => 'margin-left:auto;margin-right:0;text-align:right;' |
|
| 62 | + )); |
|
| 63 | + |
|
| 64 | + // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- |
|
| 65 | + // {{{ |
|
| 66 | + $align_lookup = array(); |
|
| 67 | + $align_values = array('left', 'right', 'center', 'justify'); |
|
| 68 | + foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; |
|
| 69 | + // }}} |
|
| 70 | + $r['h1@align'] = |
|
| 71 | + $r['h2@align'] = |
|
| 72 | + $r['h3@align'] = |
|
| 73 | + $r['h4@align'] = |
|
| 74 | + $r['h5@align'] = |
|
| 75 | + $r['h6@align'] = |
|
| 76 | + $r['p@align'] = |
|
| 77 | + $r['div@align'] = |
|
| 78 | + new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); |
|
| 79 | + |
|
| 80 | + // @bgcolor for table, tr, td, th --------------------------------- |
|
| 81 | + $r['table@bgcolor'] = |
|
| 82 | + $r['td@bgcolor'] = |
|
| 83 | + $r['th@bgcolor'] = |
|
| 84 | + new HTMLPurifier_AttrTransform_BgColor(); |
|
| 85 | + |
|
| 86 | + // @border for img ------------------------------------------------ |
|
| 87 | + $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); |
|
| 88 | + |
|
| 89 | + // @clear for br -------------------------------------------------- |
|
| 90 | + $r['br@clear'] = |
|
| 91 | + new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( |
|
| 92 | + 'left' => 'clear:left;', |
|
| 93 | + 'right' => 'clear:right;', |
|
| 94 | + 'all' => 'clear:both;', |
|
| 95 | + 'none' => 'clear:none;', |
|
| 96 | + )); |
|
| 97 | + |
|
| 98 | + // @height for td, th --------------------------------------------- |
|
| 99 | + $r['td@height'] = |
|
| 100 | + $r['th@height'] = |
|
| 101 | + new HTMLPurifier_AttrTransform_Length('height'); |
|
| 102 | + |
|
| 103 | + // @hspace for img ------------------------------------------------ |
|
| 104 | + $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); |
|
| 105 | + |
|
| 106 | + // @noshade for hr ------------------------------------------------ |
|
| 107 | + // this transformation is not precise but often good enough. |
|
| 108 | + // different browsers use different styles to designate noshade |
|
| 109 | + $r['hr@noshade'] = |
|
| 110 | + new HTMLPurifier_AttrTransform_BoolToCSS( |
|
| 111 | + 'noshade', |
|
| 112 | + 'color:#808080;background-color:#808080;border:0;' |
|
| 113 | + ); |
|
| 114 | + |
|
| 115 | + // @nowrap for td, th --------------------------------------------- |
|
| 116 | + $r['td@nowrap'] = |
|
| 117 | + $r['th@nowrap'] = |
|
| 118 | + new HTMLPurifier_AttrTransform_BoolToCSS( |
|
| 119 | + 'nowrap', |
|
| 120 | + 'white-space:nowrap;' |
|
| 121 | + ); |
|
| 122 | + |
|
| 123 | + // @size for hr -------------------------------------------------- |
|
| 124 | + $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); |
|
| 125 | + |
|
| 126 | + // @type for li, ol, ul ------------------------------------------- |
|
| 127 | + // {{{ |
|
| 128 | + $ul_types = array( |
|
| 129 | + 'disc' => 'list-style-type:disc;', |
|
| 130 | + 'square' => 'list-style-type:square;', |
|
| 131 | + 'circle' => 'list-style-type:circle;' |
|
| 132 | + ); |
|
| 133 | + $ol_types = array( |
|
| 134 | + '1' => 'list-style-type:decimal;', |
|
| 135 | + 'i' => 'list-style-type:lower-roman;', |
|
| 136 | + 'I' => 'list-style-type:upper-roman;', |
|
| 137 | + 'a' => 'list-style-type:lower-alpha;', |
|
| 138 | + 'A' => 'list-style-type:upper-alpha;' |
|
| 139 | + ); |
|
| 140 | + $li_types = $ul_types + $ol_types; |
|
| 141 | + // }}} |
|
| 142 | + |
|
| 143 | + $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); |
|
| 144 | + $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); |
|
| 145 | + $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); |
|
| 146 | + |
|
| 147 | + // @vspace for img ------------------------------------------------ |
|
| 148 | + $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); |
|
| 149 | + |
|
| 150 | + // @width for hr, td, th ------------------------------------------ |
|
| 151 | + $r['td@width'] = |
|
| 152 | + $r['th@width'] = |
|
| 153 | + $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); |
|
| 154 | + |
|
| 155 | + return $r; |
|
| 156 | + |
|
| 157 | + } |
|
| 158 | 158 | |
| 159 | 159 | } |
| 160 | 160 | |
@@ -12,7 +12,7 @@ |
||
| 12 | 12 | $r['font'] = new HTMLPurifier_TagTransform_Font(); |
| 13 | 13 | $r['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); |
| 14 | 14 | $r['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); |
| 15 | - $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;'); |
|
| 15 | + $r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;'); |
|
| 16 | 16 | $r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;'); |
| 17 | 17 | $r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); |
| 18 | 18 | $r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;'); |
@@ -65,7 +65,9 @@ |
||
| 65 | 65 | // {{{ |
| 66 | 66 | $align_lookup = array(); |
| 67 | 67 | $align_values = array('left', 'right', 'center', 'justify'); |
| 68 | - foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; |
|
| 68 | + foreach ($align_values as $v) { |
|
| 69 | + $align_lookup[$v] = "text-align:$v;"; |
|
| 70 | + } |
|
| 69 | 71 | // }}} |
| 70 | 72 | $r['h1@align'] = |
| 71 | 73 | $r['h2@align'] = |
@@ -2,13 +2,13 @@ |
||
| 2 | 2 | |
| 3 | 3 | class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule |
| 4 | 4 | { |
| 5 | - public $name = 'XMLCommonAttributes'; |
|
| 5 | + public $name = 'XMLCommonAttributes'; |
|
| 6 | 6 | |
| 7 | - public $attr_collections = array( |
|
| 8 | - 'Lang' => array( |
|
| 9 | - 'xml:lang' => 'LanguageCode', |
|
| 10 | - ) |
|
| 11 | - ); |
|
| 7 | + public $attr_collections = array( |
|
| 8 | + 'Lang' => array( |
|
| 9 | + 'xml:lang' => 'LanguageCode', |
|
| 10 | + ) |
|
| 11 | + ); |
|
| 12 | 12 | } |
| 13 | 13 | |
| 14 | 14 | // vim: et sw=4 sts=4 |
@@ -9,336 +9,336 @@ |
||
| 9 | 9 | class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector |
| 10 | 10 | { |
| 11 | 11 | |
| 12 | - public $name = 'AutoParagraph'; |
|
| 13 | - public $needed = array('p'); |
|
| 14 | - |
|
| 15 | - private function _pStart() { |
|
| 16 | - $par = new HTMLPurifier_Token_Start('p'); |
|
| 17 | - $par->armor['MakeWellFormed_TagClosedError'] = true; |
|
| 18 | - return $par; |
|
| 19 | - } |
|
| 20 | - |
|
| 21 | - public function handleText(&$token) { |
|
| 22 | - $text = $token->data; |
|
| 23 | - // Does the current parent allow <p> tags? |
|
| 24 | - if ($this->allowsElement('p')) { |
|
| 25 | - if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) { |
|
| 26 | - // Note that we have differing behavior when dealing with text |
|
| 27 | - // in the anonymous root node, or a node inside the document. |
|
| 28 | - // If the text as a double-newline, the treatment is the same; |
|
| 29 | - // if it doesn't, see the next if-block if you're in the document. |
|
| 30 | - |
|
| 31 | - $i = $nesting = null; |
|
| 32 | - if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { |
|
| 33 | - // State 1.1: ... ^ (whitespace, then document end) |
|
| 34 | - // ---- |
|
| 35 | - // This is a degenerate case |
|
| 36 | - } else { |
|
| 37 | - if (!$token->is_whitespace || $this->_isInline($current)) { |
|
| 38 | - // State 1.2: PAR1 |
|
| 39 | - // ---- |
|
| 40 | - |
|
| 41 | - // State 1.3: PAR1\n\nPAR2 |
|
| 42 | - // ------------ |
|
| 43 | - |
|
| 44 | - // State 1.4: <div>PAR1\n\nPAR2 (see State 2) |
|
| 45 | - // ------------ |
|
| 46 | - $token = array($this->_pStart()); |
|
| 47 | - $this->_splitText($text, $token); |
|
| 48 | - } else { |
|
| 49 | - // State 1.5: \n<hr /> |
|
| 50 | - // -- |
|
| 51 | - } |
|
| 52 | - } |
|
| 53 | - } else { |
|
| 54 | - // State 2: <div>PAR1... (similar to 1.4) |
|
| 55 | - // ---- |
|
| 56 | - |
|
| 57 | - // We're in an element that allows paragraph tags, but we're not |
|
| 58 | - // sure if we're going to need them. |
|
| 59 | - if ($this->_pLookAhead()) { |
|
| 60 | - // State 2.1: <div>PAR1<b>PAR1\n\nPAR2 |
|
| 61 | - // ---- |
|
| 62 | - // Note: This will always be the first child, since any |
|
| 63 | - // previous inline element would have triggered this very |
|
| 64 | - // same routine, and found the double newline. One possible |
|
| 65 | - // exception would be a comment. |
|
| 66 | - $token = array($this->_pStart(), $token); |
|
| 67 | - } else { |
|
| 68 | - // State 2.2.1: <div>PAR1<div> |
|
| 69 | - // ---- |
|
| 70 | - |
|
| 71 | - // State 2.2.2: <div>PAR1<b>PAR1</b></div> |
|
| 72 | - // ---- |
|
| 73 | - } |
|
| 74 | - } |
|
| 75 | - // Is the current parent a <p> tag? |
|
| 76 | - } elseif ( |
|
| 77 | - !empty($this->currentNesting) && |
|
| 78 | - $this->currentNesting[count($this->currentNesting)-1]->name == 'p' |
|
| 79 | - ) { |
|
| 80 | - // State 3.1: ...<p>PAR1 |
|
| 81 | - // ---- |
|
| 82 | - |
|
| 83 | - // State 3.2: ...<p>PAR1\n\nPAR2 |
|
| 84 | - // ------------ |
|
| 85 | - $token = array(); |
|
| 86 | - $this->_splitText($text, $token); |
|
| 87 | - // Abort! |
|
| 88 | - } else { |
|
| 89 | - // State 4.1: ...<b>PAR1 |
|
| 90 | - // ---- |
|
| 91 | - |
|
| 92 | - // State 4.2: ...<b>PAR1\n\nPAR2 |
|
| 93 | - // ------------ |
|
| 94 | - } |
|
| 95 | - } |
|
| 96 | - |
|
| 97 | - public function handleElement(&$token) { |
|
| 98 | - // We don't have to check if we're already in a <p> tag for block |
|
| 99 | - // tokens, because the tag would have been autoclosed by MakeWellFormed. |
|
| 100 | - if ($this->allowsElement('p')) { |
|
| 101 | - if (!empty($this->currentNesting)) { |
|
| 102 | - if ($this->_isInline($token)) { |
|
| 103 | - // State 1: <div>...<b> |
|
| 104 | - // --- |
|
| 105 | - |
|
| 106 | - // Check if this token is adjacent to the parent token |
|
| 107 | - // (seek backwards until token isn't whitespace) |
|
| 108 | - $i = null; |
|
| 109 | - $this->backward($i, $prev); |
|
| 110 | - |
|
| 111 | - if (!$prev instanceof HTMLPurifier_Token_Start) { |
|
| 112 | - // Token wasn't adjacent |
|
| 113 | - |
|
| 114 | - if ( |
|
| 115 | - $prev instanceof HTMLPurifier_Token_Text && |
|
| 116 | - substr($prev->data, -2) === "\n\n" |
|
| 117 | - ) { |
|
| 118 | - // State 1.1.4: <div><p>PAR1</p>\n\n<b> |
|
| 119 | - // --- |
|
| 120 | - |
|
| 121 | - // Quite frankly, this should be handled by splitText |
|
| 122 | - $token = array($this->_pStart(), $token); |
|
| 123 | - } else { |
|
| 124 | - // State 1.1.1: <div><p>PAR1</p><b> |
|
| 125 | - // --- |
|
| 126 | - |
|
| 127 | - // State 1.1.2: <div><br /><b> |
|
| 128 | - // --- |
|
| 129 | - |
|
| 130 | - // State 1.1.3: <div>PAR<b> |
|
| 131 | - // --- |
|
| 132 | - } |
|
| 133 | - |
|
| 134 | - } else { |
|
| 135 | - // State 1.2.1: <div><b> |
|
| 136 | - // --- |
|
| 137 | - |
|
| 138 | - // Lookahead to see if <p> is needed. |
|
| 139 | - if ($this->_pLookAhead()) { |
|
| 140 | - // State 1.3.1: <div><b>PAR1\n\nPAR2 |
|
| 141 | - // --- |
|
| 142 | - $token = array($this->_pStart(), $token); |
|
| 143 | - } else { |
|
| 144 | - // State 1.3.2: <div><b>PAR1</b></div> |
|
| 145 | - // --- |
|
| 146 | - |
|
| 147 | - // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div> |
|
| 148 | - // --- |
|
| 149 | - } |
|
| 150 | - } |
|
| 151 | - } else { |
|
| 152 | - // State 2.3: ...<div> |
|
| 153 | - // ----- |
|
| 154 | - } |
|
| 155 | - } else { |
|
| 156 | - if ($this->_isInline($token)) { |
|
| 157 | - // State 3.1: <b> |
|
| 158 | - // --- |
|
| 159 | - // This is where the {p} tag is inserted, not reflected in |
|
| 160 | - // inputTokens yet, however. |
|
| 161 | - $token = array($this->_pStart(), $token); |
|
| 162 | - } else { |
|
| 163 | - // State 3.2: <div> |
|
| 164 | - // ----- |
|
| 165 | - } |
|
| 166 | - |
|
| 167 | - $i = null; |
|
| 168 | - if ($this->backward($i, $prev)) { |
|
| 169 | - if ( |
|
| 170 | - !$prev instanceof HTMLPurifier_Token_Text |
|
| 171 | - ) { |
|
| 172 | - // State 3.1.1: ...</p>{p}<b> |
|
| 173 | - // --- |
|
| 174 | - |
|
| 175 | - // State 3.2.1: ...</p><div> |
|
| 176 | - // ----- |
|
| 177 | - |
|
| 178 | - if (!is_array($token)) $token = array($token); |
|
| 179 | - array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); |
|
| 180 | - } else { |
|
| 181 | - // State 3.1.2: ...</p>\n\n{p}<b> |
|
| 182 | - // --- |
|
| 183 | - |
|
| 184 | - // State 3.2.2: ...</p>\n\n<div> |
|
| 185 | - // ----- |
|
| 186 | - |
|
| 187 | - // Note: PAR<ELEM> cannot occur because PAR would have been |
|
| 188 | - // wrapped in <p> tags. |
|
| 189 | - } |
|
| 190 | - } |
|
| 191 | - } |
|
| 192 | - } else { |
|
| 193 | - // State 2.2: <ul><li> |
|
| 194 | - // ---- |
|
| 195 | - |
|
| 196 | - // State 2.4: <p><b> |
|
| 197 | - // --- |
|
| 198 | - } |
|
| 199 | - } |
|
| 200 | - |
|
| 201 | - /** |
|
| 202 | - * Splits up a text in paragraph tokens and appends them |
|
| 203 | - * to the result stream that will replace the original |
|
| 204 | - * @param $data String text data that will be processed |
|
| 205 | - * into paragraphs |
|
| 206 | - * @param $result Reference to array of tokens that the |
|
| 207 | - * tags will be appended onto |
|
| 208 | - * @param $config Instance of HTMLPurifier_Config |
|
| 209 | - * @param $context Instance of HTMLPurifier_Context |
|
| 210 | - */ |
|
| 211 | - private function _splitText($data, &$result) { |
|
| 212 | - $raw_paragraphs = explode("\n\n", $data); |
|
| 213 | - $paragraphs = array(); // without empty paragraphs |
|
| 214 | - $needs_start = false; |
|
| 215 | - $needs_end = false; |
|
| 216 | - |
|
| 217 | - $c = count($raw_paragraphs); |
|
| 218 | - if ($c == 1) { |
|
| 219 | - // There were no double-newlines, abort quickly. In theory this |
|
| 220 | - // should never happen. |
|
| 221 | - $result[] = new HTMLPurifier_Token_Text($data); |
|
| 222 | - return; |
|
| 223 | - } |
|
| 224 | - for ($i = 0; $i < $c; $i++) { |
|
| 225 | - $par = $raw_paragraphs[$i]; |
|
| 226 | - if (trim($par) !== '') { |
|
| 227 | - $paragraphs[] = $par; |
|
| 228 | - } else { |
|
| 229 | - if ($i == 0) { |
|
| 230 | - // Double newline at the front |
|
| 231 | - if (empty($result)) { |
|
| 232 | - // The empty result indicates that the AutoParagraph |
|
| 233 | - // injector did not add any start paragraph tokens. |
|
| 234 | - // This means that we have been in a paragraph for |
|
| 235 | - // a while, and the newline means we should start a new one. |
|
| 236 | - $result[] = new HTMLPurifier_Token_End('p'); |
|
| 237 | - $result[] = new HTMLPurifier_Token_Text("\n\n"); |
|
| 238 | - // However, the start token should only be added if |
|
| 239 | - // there is more processing to be done (i.e. there are |
|
| 240 | - // real paragraphs in here). If there are none, the |
|
| 241 | - // next start paragraph tag will be handled by the |
|
| 242 | - // next call to the injector |
|
| 243 | - $needs_start = true; |
|
| 244 | - } else { |
|
| 245 | - // We just started a new paragraph! |
|
| 246 | - // Reinstate a double-newline for presentation's sake, since |
|
| 247 | - // it was in the source code. |
|
| 248 | - array_unshift($result, new HTMLPurifier_Token_Text("\n\n")); |
|
| 249 | - } |
|
| 250 | - } elseif ($i + 1 == $c) { |
|
| 251 | - // Double newline at the end |
|
| 252 | - // There should be a trailing </p> when we're finally done. |
|
| 253 | - $needs_end = true; |
|
| 254 | - } |
|
| 255 | - } |
|
| 256 | - } |
|
| 257 | - |
|
| 258 | - // Check if this was just a giant blob of whitespace. Move this earlier, |
|
| 259 | - // perhaps? |
|
| 260 | - if (empty($paragraphs)) { |
|
| 261 | - return; |
|
| 262 | - } |
|
| 263 | - |
|
| 264 | - // Add the start tag indicated by \n\n at the beginning of $data |
|
| 265 | - if ($needs_start) { |
|
| 266 | - $result[] = $this->_pStart(); |
|
| 267 | - } |
|
| 268 | - |
|
| 269 | - // Append the paragraphs onto the result |
|
| 270 | - foreach ($paragraphs as $par) { |
|
| 271 | - $result[] = new HTMLPurifier_Token_Text($par); |
|
| 272 | - $result[] = new HTMLPurifier_Token_End('p'); |
|
| 273 | - $result[] = new HTMLPurifier_Token_Text("\n\n"); |
|
| 274 | - $result[] = $this->_pStart(); |
|
| 275 | - } |
|
| 276 | - |
|
| 277 | - // Remove trailing start token; Injector will handle this later if |
|
| 278 | - // it was indeed needed. This prevents from needing to do a lookahead, |
|
| 279 | - // at the cost of a lookbehind later. |
|
| 280 | - array_pop($result); |
|
| 281 | - |
|
| 282 | - // If there is no need for an end tag, remove all of it and let |
|
| 283 | - // MakeWellFormed close it later. |
|
| 284 | - if (!$needs_end) { |
|
| 285 | - array_pop($result); // removes \n\n |
|
| 286 | - array_pop($result); // removes </p> |
|
| 287 | - } |
|
| 288 | - |
|
| 289 | - } |
|
| 290 | - |
|
| 291 | - /** |
|
| 292 | - * Returns true if passed token is inline (and, ergo, allowed in |
|
| 293 | - * paragraph tags) |
|
| 294 | - */ |
|
| 295 | - private function _isInline($token) { |
|
| 296 | - return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); |
|
| 297 | - } |
|
| 298 | - |
|
| 299 | - /** |
|
| 300 | - * Looks ahead in the token list and determines whether or not we need |
|
| 301 | - * to insert a <p> tag. |
|
| 302 | - */ |
|
| 303 | - private function _pLookAhead() { |
|
| 304 | - $this->current($i, $current); |
|
| 305 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; |
|
| 306 | - else $nesting = 0; |
|
| 307 | - $ok = false; |
|
| 308 | - while ($this->forwardUntilEndToken($i, $current, $nesting)) { |
|
| 309 | - $result = $this->_checkNeedsP($current); |
|
| 310 | - if ($result !== null) { |
|
| 311 | - $ok = $result; |
|
| 312 | - break; |
|
| 313 | - } |
|
| 314 | - } |
|
| 315 | - return $ok; |
|
| 316 | - } |
|
| 317 | - |
|
| 318 | - /** |
|
| 319 | - * Determines if a particular token requires an earlier inline token |
|
| 320 | - * to get a paragraph. This should be used with _forwardUntilEndToken |
|
| 321 | - */ |
|
| 322 | - private function _checkNeedsP($current) { |
|
| 323 | - if ($current instanceof HTMLPurifier_Token_Start){ |
|
| 324 | - if (!$this->_isInline($current)) { |
|
| 325 | - // <div>PAR1<div> |
|
| 326 | - // ---- |
|
| 327 | - // Terminate early, since we hit a block element |
|
| 328 | - return false; |
|
| 329 | - } |
|
| 330 | - } elseif ($current instanceof HTMLPurifier_Token_Text) { |
|
| 331 | - if (strpos($current->data, "\n\n") !== false) { |
|
| 332 | - // <div>PAR1<b>PAR1\n\nPAR2 |
|
| 333 | - // ---- |
|
| 334 | - return true; |
|
| 335 | - } else { |
|
| 336 | - // <div>PAR1<b>PAR1... |
|
| 337 | - // ---- |
|
| 338 | - } |
|
| 339 | - } |
|
| 340 | - return null; |
|
| 341 | - } |
|
| 12 | + public $name = 'AutoParagraph'; |
|
| 13 | + public $needed = array('p'); |
|
| 14 | + |
|
| 15 | + private function _pStart() { |
|
| 16 | + $par = new HTMLPurifier_Token_Start('p'); |
|
| 17 | + $par->armor['MakeWellFormed_TagClosedError'] = true; |
|
| 18 | + return $par; |
|
| 19 | + } |
|
| 20 | + |
|
| 21 | + public function handleText(&$token) { |
|
| 22 | + $text = $token->data; |
|
| 23 | + // Does the current parent allow <p> tags? |
|
| 24 | + if ($this->allowsElement('p')) { |
|
| 25 | + if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) { |
|
| 26 | + // Note that we have differing behavior when dealing with text |
|
| 27 | + // in the anonymous root node, or a node inside the document. |
|
| 28 | + // If the text as a double-newline, the treatment is the same; |
|
| 29 | + // if it doesn't, see the next if-block if you're in the document. |
|
| 30 | + |
|
| 31 | + $i = $nesting = null; |
|
| 32 | + if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { |
|
| 33 | + // State 1.1: ... ^ (whitespace, then document end) |
|
| 34 | + // ---- |
|
| 35 | + // This is a degenerate case |
|
| 36 | + } else { |
|
| 37 | + if (!$token->is_whitespace || $this->_isInline($current)) { |
|
| 38 | + // State 1.2: PAR1 |
|
| 39 | + // ---- |
|
| 40 | + |
|
| 41 | + // State 1.3: PAR1\n\nPAR2 |
|
| 42 | + // ------------ |
|
| 43 | + |
|
| 44 | + // State 1.4: <div>PAR1\n\nPAR2 (see State 2) |
|
| 45 | + // ------------ |
|
| 46 | + $token = array($this->_pStart()); |
|
| 47 | + $this->_splitText($text, $token); |
|
| 48 | + } else { |
|
| 49 | + // State 1.5: \n<hr /> |
|
| 50 | + // -- |
|
| 51 | + } |
|
| 52 | + } |
|
| 53 | + } else { |
|
| 54 | + // State 2: <div>PAR1... (similar to 1.4) |
|
| 55 | + // ---- |
|
| 56 | + |
|
| 57 | + // We're in an element that allows paragraph tags, but we're not |
|
| 58 | + // sure if we're going to need them. |
|
| 59 | + if ($this->_pLookAhead()) { |
|
| 60 | + // State 2.1: <div>PAR1<b>PAR1\n\nPAR2 |
|
| 61 | + // ---- |
|
| 62 | + // Note: This will always be the first child, since any |
|
| 63 | + // previous inline element would have triggered this very |
|
| 64 | + // same routine, and found the double newline. One possible |
|
| 65 | + // exception would be a comment. |
|
| 66 | + $token = array($this->_pStart(), $token); |
|
| 67 | + } else { |
|
| 68 | + // State 2.2.1: <div>PAR1<div> |
|
| 69 | + // ---- |
|
| 70 | + |
|
| 71 | + // State 2.2.2: <div>PAR1<b>PAR1</b></div> |
|
| 72 | + // ---- |
|
| 73 | + } |
|
| 74 | + } |
|
| 75 | + // Is the current parent a <p> tag? |
|
| 76 | + } elseif ( |
|
| 77 | + !empty($this->currentNesting) && |
|
| 78 | + $this->currentNesting[count($this->currentNesting)-1]->name == 'p' |
|
| 79 | + ) { |
|
| 80 | + // State 3.1: ...<p>PAR1 |
|
| 81 | + // ---- |
|
| 82 | + |
|
| 83 | + // State 3.2: ...<p>PAR1\n\nPAR2 |
|
| 84 | + // ------------ |
|
| 85 | + $token = array(); |
|
| 86 | + $this->_splitText($text, $token); |
|
| 87 | + // Abort! |
|
| 88 | + } else { |
|
| 89 | + // State 4.1: ...<b>PAR1 |
|
| 90 | + // ---- |
|
| 91 | + |
|
| 92 | + // State 4.2: ...<b>PAR1\n\nPAR2 |
|
| 93 | + // ------------ |
|
| 94 | + } |
|
| 95 | + } |
|
| 96 | + |
|
| 97 | + public function handleElement(&$token) { |
|
| 98 | + // We don't have to check if we're already in a <p> tag for block |
|
| 99 | + // tokens, because the tag would have been autoclosed by MakeWellFormed. |
|
| 100 | + if ($this->allowsElement('p')) { |
|
| 101 | + if (!empty($this->currentNesting)) { |
|
| 102 | + if ($this->_isInline($token)) { |
|
| 103 | + // State 1: <div>...<b> |
|
| 104 | + // --- |
|
| 105 | + |
|
| 106 | + // Check if this token is adjacent to the parent token |
|
| 107 | + // (seek backwards until token isn't whitespace) |
|
| 108 | + $i = null; |
|
| 109 | + $this->backward($i, $prev); |
|
| 110 | + |
|
| 111 | + if (!$prev instanceof HTMLPurifier_Token_Start) { |
|
| 112 | + // Token wasn't adjacent |
|
| 113 | + |
|
| 114 | + if ( |
|
| 115 | + $prev instanceof HTMLPurifier_Token_Text && |
|
| 116 | + substr($prev->data, -2) === "\n\n" |
|
| 117 | + ) { |
|
| 118 | + // State 1.1.4: <div><p>PAR1</p>\n\n<b> |
|
| 119 | + // --- |
|
| 120 | + |
|
| 121 | + // Quite frankly, this should be handled by splitText |
|
| 122 | + $token = array($this->_pStart(), $token); |
|
| 123 | + } else { |
|
| 124 | + // State 1.1.1: <div><p>PAR1</p><b> |
|
| 125 | + // --- |
|
| 126 | + |
|
| 127 | + // State 1.1.2: <div><br /><b> |
|
| 128 | + // --- |
|
| 129 | + |
|
| 130 | + // State 1.1.3: <div>PAR<b> |
|
| 131 | + // --- |
|
| 132 | + } |
|
| 133 | + |
|
| 134 | + } else { |
|
| 135 | + // State 1.2.1: <div><b> |
|
| 136 | + // --- |
|
| 137 | + |
|
| 138 | + // Lookahead to see if <p> is needed. |
|
| 139 | + if ($this->_pLookAhead()) { |
|
| 140 | + // State 1.3.1: <div><b>PAR1\n\nPAR2 |
|
| 141 | + // --- |
|
| 142 | + $token = array($this->_pStart(), $token); |
|
| 143 | + } else { |
|
| 144 | + // State 1.3.2: <div><b>PAR1</b></div> |
|
| 145 | + // --- |
|
| 146 | + |
|
| 147 | + // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div> |
|
| 148 | + // --- |
|
| 149 | + } |
|
| 150 | + } |
|
| 151 | + } else { |
|
| 152 | + // State 2.3: ...<div> |
|
| 153 | + // ----- |
|
| 154 | + } |
|
| 155 | + } else { |
|
| 156 | + if ($this->_isInline($token)) { |
|
| 157 | + // State 3.1: <b> |
|
| 158 | + // --- |
|
| 159 | + // This is where the {p} tag is inserted, not reflected in |
|
| 160 | + // inputTokens yet, however. |
|
| 161 | + $token = array($this->_pStart(), $token); |
|
| 162 | + } else { |
|
| 163 | + // State 3.2: <div> |
|
| 164 | + // ----- |
|
| 165 | + } |
|
| 166 | + |
|
| 167 | + $i = null; |
|
| 168 | + if ($this->backward($i, $prev)) { |
|
| 169 | + if ( |
|
| 170 | + !$prev instanceof HTMLPurifier_Token_Text |
|
| 171 | + ) { |
|
| 172 | + // State 3.1.1: ...</p>{p}<b> |
|
| 173 | + // --- |
|
| 174 | + |
|
| 175 | + // State 3.2.1: ...</p><div> |
|
| 176 | + // ----- |
|
| 177 | + |
|
| 178 | + if (!is_array($token)) $token = array($token); |
|
| 179 | + array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); |
|
| 180 | + } else { |
|
| 181 | + // State 3.1.2: ...</p>\n\n{p}<b> |
|
| 182 | + // --- |
|
| 183 | + |
|
| 184 | + // State 3.2.2: ...</p>\n\n<div> |
|
| 185 | + // ----- |
|
| 186 | + |
|
| 187 | + // Note: PAR<ELEM> cannot occur because PAR would have been |
|
| 188 | + // wrapped in <p> tags. |
|
| 189 | + } |
|
| 190 | + } |
|
| 191 | + } |
|
| 192 | + } else { |
|
| 193 | + // State 2.2: <ul><li> |
|
| 194 | + // ---- |
|
| 195 | + |
|
| 196 | + // State 2.4: <p><b> |
|
| 197 | + // --- |
|
| 198 | + } |
|
| 199 | + } |
|
| 200 | + |
|
| 201 | + /** |
|
| 202 | + * Splits up a text in paragraph tokens and appends them |
|
| 203 | + * to the result stream that will replace the original |
|
| 204 | + * @param $data String text data that will be processed |
|
| 205 | + * into paragraphs |
|
| 206 | + * @param $result Reference to array of tokens that the |
|
| 207 | + * tags will be appended onto |
|
| 208 | + * @param $config Instance of HTMLPurifier_Config |
|
| 209 | + * @param $context Instance of HTMLPurifier_Context |
|
| 210 | + */ |
|
| 211 | + private function _splitText($data, &$result) { |
|
| 212 | + $raw_paragraphs = explode("\n\n", $data); |
|
| 213 | + $paragraphs = array(); // without empty paragraphs |
|
| 214 | + $needs_start = false; |
|
| 215 | + $needs_end = false; |
|
| 216 | + |
|
| 217 | + $c = count($raw_paragraphs); |
|
| 218 | + if ($c == 1) { |
|
| 219 | + // There were no double-newlines, abort quickly. In theory this |
|
| 220 | + // should never happen. |
|
| 221 | + $result[] = new HTMLPurifier_Token_Text($data); |
|
| 222 | + return; |
|
| 223 | + } |
|
| 224 | + for ($i = 0; $i < $c; $i++) { |
|
| 225 | + $par = $raw_paragraphs[$i]; |
|
| 226 | + if (trim($par) !== '') { |
|
| 227 | + $paragraphs[] = $par; |
|
| 228 | + } else { |
|
| 229 | + if ($i == 0) { |
|
| 230 | + // Double newline at the front |
|
| 231 | + if (empty($result)) { |
|
| 232 | + // The empty result indicates that the AutoParagraph |
|
| 233 | + // injector did not add any start paragraph tokens. |
|
| 234 | + // This means that we have been in a paragraph for |
|
| 235 | + // a while, and the newline means we should start a new one. |
|
| 236 | + $result[] = new HTMLPurifier_Token_End('p'); |
|
| 237 | + $result[] = new HTMLPurifier_Token_Text("\n\n"); |
|
| 238 | + // However, the start token should only be added if |
|
| 239 | + // there is more processing to be done (i.e. there are |
|
| 240 | + // real paragraphs in here). If there are none, the |
|
| 241 | + // next start paragraph tag will be handled by the |
|
| 242 | + // next call to the injector |
|
| 243 | + $needs_start = true; |
|
| 244 | + } else { |
|
| 245 | + // We just started a new paragraph! |
|
| 246 | + // Reinstate a double-newline for presentation's sake, since |
|
| 247 | + // it was in the source code. |
|
| 248 | + array_unshift($result, new HTMLPurifier_Token_Text("\n\n")); |
|
| 249 | + } |
|
| 250 | + } elseif ($i + 1 == $c) { |
|
| 251 | + // Double newline at the end |
|
| 252 | + // There should be a trailing </p> when we're finally done. |
|
| 253 | + $needs_end = true; |
|
| 254 | + } |
|
| 255 | + } |
|
| 256 | + } |
|
| 257 | + |
|
| 258 | + // Check if this was just a giant blob of whitespace. Move this earlier, |
|
| 259 | + // perhaps? |
|
| 260 | + if (empty($paragraphs)) { |
|
| 261 | + return; |
|
| 262 | + } |
|
| 263 | + |
|
| 264 | + // Add the start tag indicated by \n\n at the beginning of $data |
|
| 265 | + if ($needs_start) { |
|
| 266 | + $result[] = $this->_pStart(); |
|
| 267 | + } |
|
| 268 | + |
|
| 269 | + // Append the paragraphs onto the result |
|
| 270 | + foreach ($paragraphs as $par) { |
|
| 271 | + $result[] = new HTMLPurifier_Token_Text($par); |
|
| 272 | + $result[] = new HTMLPurifier_Token_End('p'); |
|
| 273 | + $result[] = new HTMLPurifier_Token_Text("\n\n"); |
|
| 274 | + $result[] = $this->_pStart(); |
|
| 275 | + } |
|
| 276 | + |
|
| 277 | + // Remove trailing start token; Injector will handle this later if |
|
| 278 | + // it was indeed needed. This prevents from needing to do a lookahead, |
|
| 279 | + // at the cost of a lookbehind later. |
|
| 280 | + array_pop($result); |
|
| 281 | + |
|
| 282 | + // If there is no need for an end tag, remove all of it and let |
|
| 283 | + // MakeWellFormed close it later. |
|
| 284 | + if (!$needs_end) { |
|
| 285 | + array_pop($result); // removes \n\n |
|
| 286 | + array_pop($result); // removes </p> |
|
| 287 | + } |
|
| 288 | + |
|
| 289 | + } |
|
| 290 | + |
|
| 291 | + /** |
|
| 292 | + * Returns true if passed token is inline (and, ergo, allowed in |
|
| 293 | + * paragraph tags) |
|
| 294 | + */ |
|
| 295 | + private function _isInline($token) { |
|
| 296 | + return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); |
|
| 297 | + } |
|
| 298 | + |
|
| 299 | + /** |
|
| 300 | + * Looks ahead in the token list and determines whether or not we need |
|
| 301 | + * to insert a <p> tag. |
|
| 302 | + */ |
|
| 303 | + private function _pLookAhead() { |
|
| 304 | + $this->current($i, $current); |
|
| 305 | + if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; |
|
| 306 | + else $nesting = 0; |
|
| 307 | + $ok = false; |
|
| 308 | + while ($this->forwardUntilEndToken($i, $current, $nesting)) { |
|
| 309 | + $result = $this->_checkNeedsP($current); |
|
| 310 | + if ($result !== null) { |
|
| 311 | + $ok = $result; |
|
| 312 | + break; |
|
| 313 | + } |
|
| 314 | + } |
|
| 315 | + return $ok; |
|
| 316 | + } |
|
| 317 | + |
|
| 318 | + /** |
|
| 319 | + * Determines if a particular token requires an earlier inline token |
|
| 320 | + * to get a paragraph. This should be used with _forwardUntilEndToken |
|
| 321 | + */ |
|
| 322 | + private function _checkNeedsP($current) { |
|
| 323 | + if ($current instanceof HTMLPurifier_Token_Start){ |
|
| 324 | + if (!$this->_isInline($current)) { |
|
| 325 | + // <div>PAR1<div> |
|
| 326 | + // ---- |
|
| 327 | + // Terminate early, since we hit a block element |
|
| 328 | + return false; |
|
| 329 | + } |
|
| 330 | + } elseif ($current instanceof HTMLPurifier_Token_Text) { |
|
| 331 | + if (strpos($current->data, "\n\n") !== false) { |
|
| 332 | + // <div>PAR1<b>PAR1\n\nPAR2 |
|
| 333 | + // ---- |
|
| 334 | + return true; |
|
| 335 | + } else { |
|
| 336 | + // <div>PAR1<b>PAR1... |
|
| 337 | + // ---- |
|
| 338 | + } |
|
| 339 | + } |
|
| 340 | + return null; |
|
| 341 | + } |
|
| 342 | 342 | |
| 343 | 343 | } |
| 344 | 344 | |
@@ -75,7 +75,7 @@ discard block |
||
| 75 | 75 | // Is the current parent a <p> tag? |
| 76 | 76 | } elseif ( |
| 77 | 77 | !empty($this->currentNesting) && |
| 78 | - $this->currentNesting[count($this->currentNesting)-1]->name == 'p' |
|
| 78 | + $this->currentNesting[count($this->currentNesting) - 1]->name == 'p' |
|
| 79 | 79 | ) { |
| 80 | 80 | // State 3.1: ...<p>PAR1 |
| 81 | 81 | // ---- |
@@ -320,7 +320,7 @@ discard block |
||
| 320 | 320 | * to get a paragraph. This should be used with _forwardUntilEndToken |
| 321 | 321 | */ |
| 322 | 322 | private function _checkNeedsP($current) { |
| 323 | - if ($current instanceof HTMLPurifier_Token_Start){ |
|
| 323 | + if ($current instanceof HTMLPurifier_Token_Start) { |
|
| 324 | 324 | if (!$this->_isInline($current)) { |
| 325 | 325 | // <div>PAR1<div> |
| 326 | 326 | // ---- |
@@ -175,7 +175,9 @@ discard block |
||
| 175 | 175 | // State 3.2.1: ...</p><div> |
| 176 | 176 | // ----- |
| 177 | 177 | |
| 178 | - if (!is_array($token)) $token = array($token); |
|
| 178 | + if (!is_array($token)) { |
|
| 179 | + $token = array($token); |
|
| 180 | + } |
|
| 179 | 181 | array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); |
| 180 | 182 | } else { |
| 181 | 183 | // State 3.1.2: ...</p>\n\n{p}<b> |
@@ -302,8 +304,11 @@ discard block |
||
| 302 | 304 | */ |
| 303 | 305 | private function _pLookAhead() { |
| 304 | 306 | $this->current($i, $current); |
| 305 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; |
|
| 306 | - else $nesting = 0; |
|
| 307 | + if ($current instanceof HTMLPurifier_Token_Start) { |
|
| 308 | + $nesting = 1; |
|
| 309 | + } else { |
|
| 310 | + $nesting = 0; |
|
| 311 | + } |
|
| 307 | 312 | $ok = false; |
| 308 | 313 | while ($this->forwardUntilEndToken($i, $current, $nesting)) { |
| 309 | 314 | $result = $this->_checkNeedsP($current); |
@@ -6,21 +6,21 @@ |
||
| 6 | 6 | class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector |
| 7 | 7 | { |
| 8 | 8 | |
| 9 | - public $name = 'DisplayLinkURI'; |
|
| 10 | - public $needed = array('a'); |
|
| 9 | + public $name = 'DisplayLinkURI'; |
|
| 10 | + public $needed = array('a'); |
|
| 11 | 11 | |
| 12 | - public function handleElement(&$token) { |
|
| 13 | - } |
|
| 12 | + public function handleElement(&$token) { |
|
| 13 | + } |
|
| 14 | 14 | |
| 15 | - public function handleEnd(&$token) { |
|
| 16 | - if (isset($token->start->attr['href'])){ |
|
| 17 | - $url = $token->start->attr['href']; |
|
| 18 | - unset($token->start->attr['href']); |
|
| 19 | - $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); |
|
| 20 | - } else { |
|
| 21 | - // nothing to display |
|
| 22 | - } |
|
| 23 | - } |
|
| 15 | + public function handleEnd(&$token) { |
|
| 16 | + if (isset($token->start->attr['href'])){ |
|
| 17 | + $url = $token->start->attr['href']; |
|
| 18 | + unset($token->start->attr['href']); |
|
| 19 | + $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); |
|
| 20 | + } else { |
|
| 21 | + // nothing to display |
|
| 22 | + } |
|
| 23 | + } |
|
| 24 | 24 | } |
| 25 | 25 | |
| 26 | 26 | // vim: et sw=4 sts=4 |
@@ -13,7 +13,7 @@ |
||
| 13 | 13 | } |
| 14 | 14 | |
| 15 | 15 | public function handleEnd(&$token) { |
| 16 | - if (isset($token->start->attr['href'])){ |
|
| 16 | + if (isset($token->start->attr['href'])) { |
|
| 17 | 17 | $url = $token->start->attr['href']; |
| 18 | 18 | unset($token->start->attr['href']); |
| 19 | 19 | $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); |
@@ -6,40 +6,40 @@ |
||
| 6 | 6 | class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector |
| 7 | 7 | { |
| 8 | 8 | |
| 9 | - public $name = 'Linkify'; |
|
| 10 | - public $needed = array('a' => array('href')); |
|
| 11 | - |
|
| 12 | - public function handleText(&$token) { |
|
| 13 | - if (!$this->allowsElement('a')) return; |
|
| 14 | - |
|
| 15 | - if (strpos($token->data, '://') === false) { |
|
| 16 | - // our really quick heuristic failed, abort |
|
| 17 | - // this may not work so well if we want to match things like |
|
| 18 | - // "google.com", but then again, most people don't |
|
| 19 | - return; |
|
| 20 | - } |
|
| 21 | - |
|
| 22 | - // there is/are URL(s). Let's split the string: |
|
| 23 | - // Note: this regex is extremely permissive |
|
| 24 | - $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 25 | - |
|
| 26 | - $token = array(); |
|
| 27 | - |
|
| 28 | - // $i = index |
|
| 29 | - // $c = count |
|
| 30 | - // $l = is link |
|
| 31 | - for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
|
| 32 | - if (!$l) { |
|
| 33 | - if ($bits[$i] === '') continue; |
|
| 34 | - $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
| 35 | - } else { |
|
| 36 | - $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); |
|
| 37 | - $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
| 38 | - $token[] = new HTMLPurifier_Token_End('a'); |
|
| 39 | - } |
|
| 40 | - } |
|
| 41 | - |
|
| 42 | - } |
|
| 9 | + public $name = 'Linkify'; |
|
| 10 | + public $needed = array('a' => array('href')); |
|
| 11 | + |
|
| 12 | + public function handleText(&$token) { |
|
| 13 | + if (!$this->allowsElement('a')) return; |
|
| 14 | + |
|
| 15 | + if (strpos($token->data, '://') === false) { |
|
| 16 | + // our really quick heuristic failed, abort |
|
| 17 | + // this may not work so well if we want to match things like |
|
| 18 | + // "google.com", but then again, most people don't |
|
| 19 | + return; |
|
| 20 | + } |
|
| 21 | + |
|
| 22 | + // there is/are URL(s). Let's split the string: |
|
| 23 | + // Note: this regex is extremely permissive |
|
| 24 | + $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 25 | + |
|
| 26 | + $token = array(); |
|
| 27 | + |
|
| 28 | + // $i = index |
|
| 29 | + // $c = count |
|
| 30 | + // $l = is link |
|
| 31 | + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
|
| 32 | + if (!$l) { |
|
| 33 | + if ($bits[$i] === '') continue; |
|
| 34 | + $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
| 35 | + } else { |
|
| 36 | + $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); |
|
| 37 | + $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
| 38 | + $token[] = new HTMLPurifier_Token_End('a'); |
|
| 39 | + } |
|
| 40 | + } |
|
| 41 | + |
|
| 42 | + } |
|
| 43 | 43 | |
| 44 | 44 | } |
| 45 | 45 | |
@@ -10,7 +10,9 @@ discard block |
||
| 10 | 10 | public $needed = array('a' => array('href')); |
| 11 | 11 | |
| 12 | 12 | public function handleText(&$token) { |
| 13 | - if (!$this->allowsElement('a')) return; |
|
| 13 | + if (!$this->allowsElement('a')) { |
|
| 14 | + return; |
|
| 15 | + } |
|
| 14 | 16 | |
| 15 | 17 | if (strpos($token->data, '://') === false) { |
| 16 | 18 | // our really quick heuristic failed, abort |
@@ -30,7 +32,9 @@ discard block |
||
| 30 | 32 | // $l = is link |
| 31 | 33 | for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
| 32 | 34 | if (!$l) { |
| 33 | - if ($bits[$i] === '') continue; |
|
| 35 | + if ($bits[$i] === '') { |
|
| 36 | + continue; |
|
| 37 | + } |
|
| 34 | 38 | $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
| 35 | 39 | } else { |
| 36 | 40 | $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); |
@@ -7,38 +7,38 @@ |
||
| 7 | 7 | class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector |
| 8 | 8 | { |
| 9 | 9 | |
| 10 | - public $name = 'PurifierLinkify'; |
|
| 11 | - public $docURL; |
|
| 12 | - public $needed = array('a' => array('href')); |
|
| 13 | - |
|
| 14 | - public function prepare($config, $context) { |
|
| 15 | - $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); |
|
| 16 | - return parent::prepare($config, $context); |
|
| 17 | - } |
|
| 18 | - |
|
| 19 | - public function handleText(&$token) { |
|
| 20 | - if (!$this->allowsElement('a')) return; |
|
| 21 | - if (strpos($token->data, '%') === false) return; |
|
| 22 | - |
|
| 23 | - $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 24 | - $token = array(); |
|
| 25 | - |
|
| 26 | - // $i = index |
|
| 27 | - // $c = count |
|
| 28 | - // $l = is link |
|
| 29 | - for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
|
| 30 | - if (!$l) { |
|
| 31 | - if ($bits[$i] === '') continue; |
|
| 32 | - $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
| 33 | - } else { |
|
| 34 | - $token[] = new HTMLPurifier_Token_Start('a', |
|
| 35 | - array('href' => str_replace('%s', $bits[$i], $this->docURL))); |
|
| 36 | - $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); |
|
| 37 | - $token[] = new HTMLPurifier_Token_End('a'); |
|
| 38 | - } |
|
| 39 | - } |
|
| 40 | - |
|
| 41 | - } |
|
| 10 | + public $name = 'PurifierLinkify'; |
|
| 11 | + public $docURL; |
|
| 12 | + public $needed = array('a' => array('href')); |
|
| 13 | + |
|
| 14 | + public function prepare($config, $context) { |
|
| 15 | + $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); |
|
| 16 | + return parent::prepare($config, $context); |
|
| 17 | + } |
|
| 18 | + |
|
| 19 | + public function handleText(&$token) { |
|
| 20 | + if (!$this->allowsElement('a')) return; |
|
| 21 | + if (strpos($token->data, '%') === false) return; |
|
| 22 | + |
|
| 23 | + $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 24 | + $token = array(); |
|
| 25 | + |
|
| 26 | + // $i = index |
|
| 27 | + // $c = count |
|
| 28 | + // $l = is link |
|
| 29 | + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
|
| 30 | + if (!$l) { |
|
| 31 | + if ($bits[$i] === '') continue; |
|
| 32 | + $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
|
| 33 | + } else { |
|
| 34 | + $token[] = new HTMLPurifier_Token_Start('a', |
|
| 35 | + array('href' => str_replace('%s', $bits[$i], $this->docURL))); |
|
| 36 | + $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); |
|
| 37 | + $token[] = new HTMLPurifier_Token_End('a'); |
|
| 38 | + } |
|
| 39 | + } |
|
| 40 | + |
|
| 41 | + } |
|
| 42 | 42 | |
| 43 | 43 | } |
| 44 | 44 | |
@@ -17,8 +17,12 @@ discard block |
||
| 17 | 17 | } |
| 18 | 18 | |
| 19 | 19 | public function handleText(&$token) { |
| 20 | - if (!$this->allowsElement('a')) return; |
|
| 21 | - if (strpos($token->data, '%') === false) return; |
|
| 20 | + if (!$this->allowsElement('a')) { |
|
| 21 | + return; |
|
| 22 | + } |
|
| 23 | + if (strpos($token->data, '%') === false) { |
|
| 24 | + return; |
|
| 25 | + } |
|
| 22 | 26 | |
| 23 | 27 | $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); |
| 24 | 28 | $token = array(); |
@@ -28,7 +32,9 @@ discard block |
||
| 28 | 32 | // $l = is link |
| 29 | 33 | for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { |
| 30 | 34 | if (!$l) { |
| 31 | - if ($bits[$i] === '') continue; |
|
| 35 | + if ($bits[$i] === '') { |
|
| 36 | + continue; |
|
| 37 | + } |
|
| 32 | 38 | $token[] = new HTMLPurifier_Token_Text($bits[$i]); |
| 33 | 39 | } else { |
| 34 | 40 | $token[] = new HTMLPurifier_Token_Start('a', |
@@ -33,7 +33,7 @@ |
||
| 33 | 33 | } else { |
| 34 | 34 | $token[] = new HTMLPurifier_Token_Start('a', |
| 35 | 35 | array('href' => str_replace('%s', $bits[$i], $this->docURL))); |
| 36 | - $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); |
|
| 36 | + $token[] = new HTMLPurifier_Token_Text('%'.$bits[$i]); |
|
| 37 | 37 | $token[] = new HTMLPurifier_Token_End('a'); |
| 38 | 38 | } |
| 39 | 39 | } |
@@ -3,48 +3,48 @@ |
||
| 3 | 3 | class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector |
| 4 | 4 | { |
| 5 | 5 | |
| 6 | - private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; |
|
| 6 | + private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; |
|
| 7 | 7 | |
| 8 | - public function prepare($config, $context) { |
|
| 9 | - parent::prepare($config, $context); |
|
| 10 | - $this->config = $config; |
|
| 11 | - $this->context = $context; |
|
| 12 | - $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); |
|
| 13 | - $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); |
|
| 14 | - $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 15 | - } |
|
| 8 | + public function prepare($config, $context) { |
|
| 9 | + parent::prepare($config, $context); |
|
| 10 | + $this->config = $config; |
|
| 11 | + $this->context = $context; |
|
| 12 | + $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); |
|
| 13 | + $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); |
|
| 14 | + $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 15 | + } |
|
| 16 | 16 | |
| 17 | - public function handleElement(&$token) { |
|
| 18 | - if (!$token instanceof HTMLPurifier_Token_Start) return; |
|
| 19 | - $next = false; |
|
| 20 | - for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { |
|
| 21 | - $next = $this->inputTokens[$i]; |
|
| 22 | - if ($next instanceof HTMLPurifier_Token_Text) { |
|
| 23 | - if ($next->is_whitespace) continue; |
|
| 24 | - if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { |
|
| 25 | - $plain = str_replace("\xC2\xA0", "", $next->data); |
|
| 26 | - $isWsOrNbsp = $plain === '' || ctype_space($plain); |
|
| 27 | - if ($isWsOrNbsp) continue; |
|
| 28 | - } |
|
| 29 | - } |
|
| 30 | - break; |
|
| 31 | - } |
|
| 32 | - if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { |
|
| 33 | - if ($token->name == 'colgroup') return; |
|
| 34 | - $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | - $token->armor['ValidateAttributes'] = true; |
|
| 36 | - if (isset($token->attr['id']) || isset($token->attr['name'])) return; |
|
| 37 | - $token = $i - $this->inputIndex + 1; |
|
| 38 | - for ($b = $this->inputIndex - 1; $b > 0; $b--) { |
|
| 39 | - $prev = $this->inputTokens[$b]; |
|
| 40 | - if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; |
|
| 41 | - break; |
|
| 42 | - } |
|
| 43 | - // This is safe because we removed the token that triggered this. |
|
| 44 | - $this->rewind($b - 1); |
|
| 45 | - return; |
|
| 46 | - } |
|
| 47 | - } |
|
| 17 | + public function handleElement(&$token) { |
|
| 18 | + if (!$token instanceof HTMLPurifier_Token_Start) return; |
|
| 19 | + $next = false; |
|
| 20 | + for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { |
|
| 21 | + $next = $this->inputTokens[$i]; |
|
| 22 | + if ($next instanceof HTMLPurifier_Token_Text) { |
|
| 23 | + if ($next->is_whitespace) continue; |
|
| 24 | + if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { |
|
| 25 | + $plain = str_replace("\xC2\xA0", "", $next->data); |
|
| 26 | + $isWsOrNbsp = $plain === '' || ctype_space($plain); |
|
| 27 | + if ($isWsOrNbsp) continue; |
|
| 28 | + } |
|
| 29 | + } |
|
| 30 | + break; |
|
| 31 | + } |
|
| 32 | + if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { |
|
| 33 | + if ($token->name == 'colgroup') return; |
|
| 34 | + $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | + $token->armor['ValidateAttributes'] = true; |
|
| 36 | + if (isset($token->attr['id']) || isset($token->attr['name'])) return; |
|
| 37 | + $token = $i - $this->inputIndex + 1; |
|
| 38 | + for ($b = $this->inputIndex - 1; $b > 0; $b--) { |
|
| 39 | + $prev = $this->inputTokens[$b]; |
|
| 40 | + if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; |
|
| 41 | + break; |
|
| 42 | + } |
|
| 43 | + // This is safe because we removed the token that triggered this. |
|
| 44 | + $this->rewind($b - 1); |
|
| 45 | + return; |
|
| 46 | + } |
|
| 47 | + } |
|
| 48 | 48 | |
| 49 | 49 | } |
| 50 | 50 | |
@@ -15,29 +15,41 @@ |
||
| 15 | 15 | } |
| 16 | 16 | |
| 17 | 17 | public function handleElement(&$token) { |
| 18 | - if (!$token instanceof HTMLPurifier_Token_Start) return; |
|
| 18 | + if (!$token instanceof HTMLPurifier_Token_Start) { |
|
| 19 | + return; |
|
| 20 | + } |
|
| 19 | 21 | $next = false; |
| 20 | 22 | for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { |
| 21 | 23 | $next = $this->inputTokens[$i]; |
| 22 | 24 | if ($next instanceof HTMLPurifier_Token_Text) { |
| 23 | - if ($next->is_whitespace) continue; |
|
| 25 | + if ($next->is_whitespace) { |
|
| 26 | + continue; |
|
| 27 | + } |
|
| 24 | 28 | if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { |
| 25 | 29 | $plain = str_replace("\xC2\xA0", "", $next->data); |
| 26 | 30 | $isWsOrNbsp = $plain === '' || ctype_space($plain); |
| 27 | - if ($isWsOrNbsp) continue; |
|
| 31 | + if ($isWsOrNbsp) { |
|
| 32 | + continue; |
|
| 33 | + } |
|
| 28 | 34 | } |
| 29 | 35 | } |
| 30 | 36 | break; |
| 31 | 37 | } |
| 32 | 38 | if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { |
| 33 | - if ($token->name == 'colgroup') return; |
|
| 39 | + if ($token->name == 'colgroup') { |
|
| 40 | + return; |
|
| 41 | + } |
|
| 34 | 42 | $this->attrValidator->validateToken($token, $this->config, $this->context); |
| 35 | 43 | $token->armor['ValidateAttributes'] = true; |
| 36 | - if (isset($token->attr['id']) || isset($token->attr['name'])) return; |
|
| 44 | + if (isset($token->attr['id']) || isset($token->attr['name'])) { |
|
| 45 | + return; |
|
| 46 | + } |
|
| 37 | 47 | $token = $i - $this->inputIndex + 1; |
| 38 | 48 | for ($b = $this->inputIndex - 1; $b > 0; $b--) { |
| 39 | 49 | $prev = $this->inputTokens[$b]; |
| 40 | - if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; |
|
| 50 | + if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) { |
|
| 51 | + continue; |
|
| 52 | + } |
|
| 41 | 53 | break; |
| 42 | 54 | } |
| 43 | 55 | // This is safe because we removed the token that triggered this. |
@@ -5,56 +5,56 @@ |
||
| 5 | 5 | */ |
| 6 | 6 | class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector |
| 7 | 7 | { |
| 8 | - public $name = 'RemoveSpansWithoutAttributes'; |
|
| 9 | - public $needed = array('span'); |
|
| 10 | - |
|
| 11 | - private $attrValidator; |
|
| 12 | - |
|
| 13 | - /** |
|
| 14 | - * Used by AttrValidator |
|
| 15 | - */ |
|
| 16 | - private $config; |
|
| 17 | - private $context; |
|
| 18 | - |
|
| 19 | - public function prepare($config, $context) { |
|
| 20 | - $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 21 | - $this->config = $config; |
|
| 22 | - $this->context = $context; |
|
| 23 | - return parent::prepare($config, $context); |
|
| 24 | - } |
|
| 25 | - |
|
| 26 | - public function handleElement(&$token) { |
|
| 27 | - if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { |
|
| 28 | - return; |
|
| 29 | - } |
|
| 30 | - |
|
| 31 | - // We need to validate the attributes now since this doesn't normally |
|
| 32 | - // happen until after MakeWellFormed. If all the attributes are removed |
|
| 33 | - // the span needs to be removed too. |
|
| 34 | - $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | - $token->armor['ValidateAttributes'] = true; |
|
| 36 | - |
|
| 37 | - if (!empty($token->attr)) { |
|
| 38 | - return; |
|
| 39 | - } |
|
| 40 | - |
|
| 41 | - $nesting = 0; |
|
| 42 | - $spanContentTokens = array(); |
|
| 43 | - while ($this->forwardUntilEndToken($i, $current, $nesting)) {} |
|
| 44 | - |
|
| 45 | - if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { |
|
| 46 | - // Mark closing span tag for deletion |
|
| 47 | - $current->markForDeletion = true; |
|
| 48 | - // Delete open span tag |
|
| 49 | - $token = false; |
|
| 50 | - } |
|
| 51 | - } |
|
| 52 | - |
|
| 53 | - public function handleEnd(&$token) { |
|
| 54 | - if ($token->markForDeletion) { |
|
| 55 | - $token = false; |
|
| 56 | - } |
|
| 57 | - } |
|
| 8 | + public $name = 'RemoveSpansWithoutAttributes'; |
|
| 9 | + public $needed = array('span'); |
|
| 10 | + |
|
| 11 | + private $attrValidator; |
|
| 12 | + |
|
| 13 | + /** |
|
| 14 | + * Used by AttrValidator |
|
| 15 | + */ |
|
| 16 | + private $config; |
|
| 17 | + private $context; |
|
| 18 | + |
|
| 19 | + public function prepare($config, $context) { |
|
| 20 | + $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 21 | + $this->config = $config; |
|
| 22 | + $this->context = $context; |
|
| 23 | + return parent::prepare($config, $context); |
|
| 24 | + } |
|
| 25 | + |
|
| 26 | + public function handleElement(&$token) { |
|
| 27 | + if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { |
|
| 28 | + return; |
|
| 29 | + } |
|
| 30 | + |
|
| 31 | + // We need to validate the attributes now since this doesn't normally |
|
| 32 | + // happen until after MakeWellFormed. If all the attributes are removed |
|
| 33 | + // the span needs to be removed too. |
|
| 34 | + $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | + $token->armor['ValidateAttributes'] = true; |
|
| 36 | + |
|
| 37 | + if (!empty($token->attr)) { |
|
| 38 | + return; |
|
| 39 | + } |
|
| 40 | + |
|
| 41 | + $nesting = 0; |
|
| 42 | + $spanContentTokens = array(); |
|
| 43 | + while ($this->forwardUntilEndToken($i, $current, $nesting)) {} |
|
| 44 | + |
|
| 45 | + if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { |
|
| 46 | + // Mark closing span tag for deletion |
|
| 47 | + $current->markForDeletion = true; |
|
| 48 | + // Delete open span tag |
|
| 49 | + $token = false; |
|
| 50 | + } |
|
| 51 | + } |
|
| 52 | + |
|
| 53 | + public function handleEnd(&$token) { |
|
| 54 | + if ($token->markForDeletion) { |
|
| 55 | + $token = false; |
|
| 56 | + } |
|
| 57 | + } |
|
| 58 | 58 | } |
| 59 | 59 | |
| 60 | 60 | // vim: et sw=4 sts=4 |
@@ -6,85 +6,85 @@ |
||
| 6 | 6 | */ |
| 7 | 7 | class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector |
| 8 | 8 | { |
| 9 | - public $name = 'SafeObject'; |
|
| 10 | - public $needed = array('object', 'param'); |
|
| 9 | + public $name = 'SafeObject'; |
|
| 10 | + public $needed = array('object', 'param'); |
|
| 11 | 11 | |
| 12 | - protected $objectStack = array(); |
|
| 13 | - protected $paramStack = array(); |
|
| 12 | + protected $objectStack = array(); |
|
| 13 | + protected $paramStack = array(); |
|
| 14 | 14 | |
| 15 | - // Keep this synchronized with AttrTransform/SafeParam.php |
|
| 16 | - protected $addParam = array( |
|
| 17 | - 'allowScriptAccess' => 'never', |
|
| 18 | - 'allowNetworking' => 'internal', |
|
| 19 | - ); |
|
| 20 | - protected $allowedParam = array( |
|
| 21 | - 'wmode' => true, |
|
| 22 | - 'movie' => true, |
|
| 23 | - 'flashvars' => true, |
|
| 24 | - 'src' => true, |
|
| 25 | - 'allowFullScreen' => true, // if omitted, assume to be 'false' |
|
| 26 | - ); |
|
| 15 | + // Keep this synchronized with AttrTransform/SafeParam.php |
|
| 16 | + protected $addParam = array( |
|
| 17 | + 'allowScriptAccess' => 'never', |
|
| 18 | + 'allowNetworking' => 'internal', |
|
| 19 | + ); |
|
| 20 | + protected $allowedParam = array( |
|
| 21 | + 'wmode' => true, |
|
| 22 | + 'movie' => true, |
|
| 23 | + 'flashvars' => true, |
|
| 24 | + 'src' => true, |
|
| 25 | + 'allowFullScreen' => true, // if omitted, assume to be 'false' |
|
| 26 | + ); |
|
| 27 | 27 | |
| 28 | - public function prepare($config, $context) { |
|
| 29 | - parent::prepare($config, $context); |
|
| 30 | - } |
|
| 28 | + public function prepare($config, $context) { |
|
| 29 | + parent::prepare($config, $context); |
|
| 30 | + } |
|
| 31 | 31 | |
| 32 | - public function handleElement(&$token) { |
|
| 33 | - if ($token->name == 'object') { |
|
| 34 | - $this->objectStack[] = $token; |
|
| 35 | - $this->paramStack[] = array(); |
|
| 36 | - $new = array($token); |
|
| 37 | - foreach ($this->addParam as $name => $value) { |
|
| 38 | - $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); |
|
| 39 | - } |
|
| 40 | - $token = $new; |
|
| 41 | - } elseif ($token->name == 'param') { |
|
| 42 | - $nest = count($this->currentNesting) - 1; |
|
| 43 | - if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { |
|
| 44 | - $i = count($this->objectStack) - 1; |
|
| 45 | - if (!isset($token->attr['name'])) { |
|
| 46 | - $token = false; |
|
| 47 | - return; |
|
| 48 | - } |
|
| 49 | - $n = $token->attr['name']; |
|
| 50 | - // We need this fix because YouTube doesn't supply a data |
|
| 51 | - // attribute, which we need if a type is specified. This is |
|
| 52 | - // *very* Flash specific. |
|
| 53 | - if (!isset($this->objectStack[$i]->attr['data']) && |
|
| 54 | - ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { |
|
| 55 | - $this->objectStack[$i]->attr['data'] = $token->attr['value']; |
|
| 56 | - } |
|
| 57 | - // Check if the parameter is the correct value but has not |
|
| 58 | - // already been added |
|
| 59 | - if ( |
|
| 60 | - !isset($this->paramStack[$i][$n]) && |
|
| 61 | - isset($this->addParam[$n]) && |
|
| 62 | - $token->attr['name'] === $this->addParam[$n] |
|
| 63 | - ) { |
|
| 64 | - // keep token, and add to param stack |
|
| 65 | - $this->paramStack[$i][$n] = true; |
|
| 66 | - } elseif (isset($this->allowedParam[$n])) { |
|
| 67 | - // keep token, don't do anything to it |
|
| 68 | - // (could possibly check for duplicates here) |
|
| 69 | - } else { |
|
| 70 | - $token = false; |
|
| 71 | - } |
|
| 72 | - } else { |
|
| 73 | - // not directly inside an object, DENY! |
|
| 74 | - $token = false; |
|
| 75 | - } |
|
| 76 | - } |
|
| 77 | - } |
|
| 32 | + public function handleElement(&$token) { |
|
| 33 | + if ($token->name == 'object') { |
|
| 34 | + $this->objectStack[] = $token; |
|
| 35 | + $this->paramStack[] = array(); |
|
| 36 | + $new = array($token); |
|
| 37 | + foreach ($this->addParam as $name => $value) { |
|
| 38 | + $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); |
|
| 39 | + } |
|
| 40 | + $token = $new; |
|
| 41 | + } elseif ($token->name == 'param') { |
|
| 42 | + $nest = count($this->currentNesting) - 1; |
|
| 43 | + if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { |
|
| 44 | + $i = count($this->objectStack) - 1; |
|
| 45 | + if (!isset($token->attr['name'])) { |
|
| 46 | + $token = false; |
|
| 47 | + return; |
|
| 48 | + } |
|
| 49 | + $n = $token->attr['name']; |
|
| 50 | + // We need this fix because YouTube doesn't supply a data |
|
| 51 | + // attribute, which we need if a type is specified. This is |
|
| 52 | + // *very* Flash specific. |
|
| 53 | + if (!isset($this->objectStack[$i]->attr['data']) && |
|
| 54 | + ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { |
|
| 55 | + $this->objectStack[$i]->attr['data'] = $token->attr['value']; |
|
| 56 | + } |
|
| 57 | + // Check if the parameter is the correct value but has not |
|
| 58 | + // already been added |
|
| 59 | + if ( |
|
| 60 | + !isset($this->paramStack[$i][$n]) && |
|
| 61 | + isset($this->addParam[$n]) && |
|
| 62 | + $token->attr['name'] === $this->addParam[$n] |
|
| 63 | + ) { |
|
| 64 | + // keep token, and add to param stack |
|
| 65 | + $this->paramStack[$i][$n] = true; |
|
| 66 | + } elseif (isset($this->allowedParam[$n])) { |
|
| 67 | + // keep token, don't do anything to it |
|
| 68 | + // (could possibly check for duplicates here) |
|
| 69 | + } else { |
|
| 70 | + $token = false; |
|
| 71 | + } |
|
| 72 | + } else { |
|
| 73 | + // not directly inside an object, DENY! |
|
| 74 | + $token = false; |
|
| 75 | + } |
|
| 76 | + } |
|
| 77 | + } |
|
| 78 | 78 | |
| 79 | - public function handleEnd(&$token) { |
|
| 80 | - // This is the WRONG way of handling the object and param stacks; |
|
| 81 | - // we should be inserting them directly on the relevant object tokens |
|
| 82 | - // so that the global stack handling handles it. |
|
| 83 | - if ($token->name == 'object') { |
|
| 84 | - array_pop($this->objectStack); |
|
| 85 | - array_pop($this->paramStack); |
|
| 86 | - } |
|
| 87 | - } |
|
| 79 | + public function handleEnd(&$token) { |
|
| 80 | + // This is the WRONG way of handling the object and param stacks; |
|
| 81 | + // we should be inserting them directly on the relevant object tokens |
|
| 82 | + // so that the global stack handling handles it. |
|
| 83 | + if ($token->name == 'object') { |
|
| 84 | + array_pop($this->objectStack); |
|
| 85 | + array_pop($this->paramStack); |
|
| 86 | + } |
|
| 87 | + } |
|
| 88 | 88 | |
| 89 | 89 | } |
| 90 | 90 | |