@@ -36,58 +36,58 @@ |
||
36 | 36 | */ |
37 | 37 | class HTTP_Request2_Exception extends PEAR_Exception |
38 | 38 | { |
39 | - /** An invalid argument was passed to a method */ |
|
40 | - const INVALID_ARGUMENT = 1; |
|
41 | - /** Some required value was not available */ |
|
42 | - const MISSING_VALUE = 2; |
|
43 | - /** Request cannot be processed due to errors in PHP configuration */ |
|
44 | - const MISCONFIGURATION = 3; |
|
45 | - /** Error reading the local file */ |
|
46 | - const READ_ERROR = 4; |
|
39 | + /** An invalid argument was passed to a method */ |
|
40 | + const INVALID_ARGUMENT = 1; |
|
41 | + /** Some required value was not available */ |
|
42 | + const MISSING_VALUE = 2; |
|
43 | + /** Request cannot be processed due to errors in PHP configuration */ |
|
44 | + const MISCONFIGURATION = 3; |
|
45 | + /** Error reading the local file */ |
|
46 | + const READ_ERROR = 4; |
|
47 | 47 | |
48 | - /** Server returned a response that does not conform to HTTP protocol */ |
|
49 | - const MALFORMED_RESPONSE = 10; |
|
50 | - /** Failure decoding Content-Encoding or Transfer-Encoding of response */ |
|
51 | - const DECODE_ERROR = 20; |
|
52 | - /** Operation timed out */ |
|
53 | - const TIMEOUT = 30; |
|
54 | - /** Number of redirects exceeded 'max_redirects' configuration parameter */ |
|
55 | - const TOO_MANY_REDIRECTS = 40; |
|
56 | - /** Redirect to a protocol other than http(s):// */ |
|
57 | - const NON_HTTP_REDIRECT = 50; |
|
48 | + /** Server returned a response that does not conform to HTTP protocol */ |
|
49 | + const MALFORMED_RESPONSE = 10; |
|
50 | + /** Failure decoding Content-Encoding or Transfer-Encoding of response */ |
|
51 | + const DECODE_ERROR = 20; |
|
52 | + /** Operation timed out */ |
|
53 | + const TIMEOUT = 30; |
|
54 | + /** Number of redirects exceeded 'max_redirects' configuration parameter */ |
|
55 | + const TOO_MANY_REDIRECTS = 40; |
|
56 | + /** Redirect to a protocol other than http(s):// */ |
|
57 | + const NON_HTTP_REDIRECT = 50; |
|
58 | 58 | |
59 | - /** |
|
60 | - * Native error code |
|
61 | - * @var int |
|
62 | - */ |
|
63 | - private $_nativeCode; |
|
59 | + /** |
|
60 | + * Native error code |
|
61 | + * @var int |
|
62 | + */ |
|
63 | + private $_nativeCode; |
|
64 | 64 | |
65 | - /** |
|
66 | - * Constructor, can set package error code and native error code |
|
67 | - * |
|
68 | - * @param string $message exception message |
|
69 | - * @param int $code package error code, one of class constants |
|
70 | - * @param int $nativeCode error code from underlying PHP extension |
|
71 | - */ |
|
72 | - public function __construct($message = null, $code = null, $nativeCode = null) |
|
73 | - { |
|
74 | - parent::__construct($message, $code); |
|
75 | - $this->_nativeCode = $nativeCode; |
|
76 | - } |
|
65 | + /** |
|
66 | + * Constructor, can set package error code and native error code |
|
67 | + * |
|
68 | + * @param string $message exception message |
|
69 | + * @param int $code package error code, one of class constants |
|
70 | + * @param int $nativeCode error code from underlying PHP extension |
|
71 | + */ |
|
72 | + public function __construct($message = null, $code = null, $nativeCode = null) |
|
73 | + { |
|
74 | + parent::__construct($message, $code); |
|
75 | + $this->_nativeCode = $nativeCode; |
|
76 | + } |
|
77 | 77 | |
78 | - /** |
|
79 | - * Returns error code produced by underlying PHP extension |
|
80 | - * |
|
81 | - * For Socket Adapter this may contain error number returned by |
|
82 | - * stream_socket_client(), for Curl Adapter this will contain error number |
|
83 | - * returned by curl_errno() |
|
84 | - * |
|
85 | - * @return integer |
|
86 | - */ |
|
87 | - public function getNativeCode() |
|
88 | - { |
|
89 | - return $this->_nativeCode; |
|
90 | - } |
|
78 | + /** |
|
79 | + * Returns error code produced by underlying PHP extension |
|
80 | + * |
|
81 | + * For Socket Adapter this may contain error number returned by |
|
82 | + * stream_socket_client(), for Curl Adapter this will contain error number |
|
83 | + * returned by curl_errno() |
|
84 | + * |
|
85 | + * @return integer |
|
86 | + */ |
|
87 | + public function getNativeCode() |
|
88 | + { |
|
89 | + return $this->_nativeCode; |
|
90 | + } |
|
91 | 91 | } |
92 | 92 | |
93 | 93 | /** |
@@ -1,13 +1,13 @@ |
||
1 | 1 | <?php |
2 | 2 | /* Copyright (C) NAVER <http://www.navercorp.com> */ |
3 | 3 | /** |
4 | - * importer |
|
5 | - * high class of importer module |
|
6 | - * |
|
7 | - * @author NAVER ([email protected]) |
|
8 | - * @package /modules/importer |
|
9 | - * @version 0.1 |
|
10 | - */ |
|
4 | + * importer |
|
5 | + * high class of importer module |
|
6 | + * |
|
7 | + * @author NAVER ([email protected]) |
|
8 | + * @package /modules/importer |
|
9 | + * @version 0.1 |
|
10 | + */ |
|
11 | 11 | class importer extends ModuleObject |
12 | 12 | { |
13 | 13 | /** |
@@ -1,10 +1,10 @@ |
||
1 | 1 | <?php |
2 | 2 | /* Copyright (C) NAVER <http://www.navercorp.com> */ |
3 | 3 | /** |
4 | - * @class install |
|
5 | - * @author NAVER ([email protected]) |
|
6 | - * @brief install module of the high class |
|
7 | - */ |
|
4 | + * @class install |
|
5 | + * @author NAVER ([email protected]) |
|
6 | + * @brief install module of the high class |
|
7 | + */ |
|
8 | 8 | class install extends ModuleObject |
9 | 9 | { |
10 | 10 | /** |
@@ -5,29 +5,29 @@ |
||
5 | 5 | */ |
6 | 6 | class PEAR5 |
7 | 7 | { |
8 | - /** |
|
9 | - * If you have a class that's mostly/entirely static, and you need static |
|
10 | - * properties, you can use this method to simulate them. Eg. in your method(s) |
|
11 | - * do this: $myVar = &PEAR5::getStaticProperty('myclass', 'myVar'); |
|
12 | - * You MUST use a reference, or they will not persist! |
|
13 | - * |
|
14 | - * @access public |
|
15 | - * @param string $class The calling classname, to prevent clashes |
|
16 | - * @param string $var The variable to retrieve. |
|
17 | - * @return mixed A reference to the variable. If not set it will be |
|
18 | - * auto initialised to NULL. |
|
19 | - */ |
|
20 | - static function &getStaticProperty($class, $var) |
|
21 | - { |
|
22 | - static $properties; |
|
23 | - if (!isset($properties[$class])) { |
|
24 | - $properties[$class] = array(); |
|
25 | - } |
|
8 | + /** |
|
9 | + * If you have a class that's mostly/entirely static, and you need static |
|
10 | + * properties, you can use this method to simulate them. Eg. in your method(s) |
|
11 | + * do this: $myVar = &PEAR5::getStaticProperty('myclass', 'myVar'); |
|
12 | + * You MUST use a reference, or they will not persist! |
|
13 | + * |
|
14 | + * @access public |
|
15 | + * @param string $class The calling classname, to prevent clashes |
|
16 | + * @param string $var The variable to retrieve. |
|
17 | + * @return mixed A reference to the variable. If not set it will be |
|
18 | + * auto initialised to NULL. |
|
19 | + */ |
|
20 | + static function &getStaticProperty($class, $var) |
|
21 | + { |
|
22 | + static $properties; |
|
23 | + if (!isset($properties[$class])) { |
|
24 | + $properties[$class] = array(); |
|
25 | + } |
|
26 | 26 | |
27 | - if (!array_key_exists($var, $properties[$class])) { |
|
28 | - $properties[$class][$var] = null; |
|
29 | - } |
|
27 | + if (!array_key_exists($var, $properties[$class])) { |
|
28 | + $properties[$class][$var] = null; |
|
29 | + } |
|
30 | 30 | |
31 | - return $properties[$class][$var]; |
|
32 | - } |
|
31 | + return $properties[$class][$var]; |
|
32 | + } |
|
33 | 33 | } |
34 | 34 | \ No newline at end of file |
@@ -22,7 +22,7 @@ discard block |
||
22 | 22 | |
23 | 23 | public function getInstance() |
24 | 24 | { |
25 | - if(!isset($GLOBALS['__PURIFIER_INSTANCE__'])) |
|
25 | + if (!isset($GLOBALS['__PURIFIER_INSTANCE__'])) |
|
26 | 26 | { |
27 | 27 | $GLOBALS['__PURIFIER_INSTANCE__'] = new Purifier(); |
28 | 28 | } |
@@ -52,9 +52,9 @@ discard block |
||
52 | 52 | { |
53 | 53 | // add attribute for edit component |
54 | 54 | $editComponentAttrs = $this->_searchEditComponent($content); |
55 | - if(is_array($editComponentAttrs)) |
|
55 | + if (is_array($editComponentAttrs)) |
|
56 | 56 | { |
57 | - foreach($editComponentAttrs AS $k => $v) |
|
57 | + foreach ($editComponentAttrs AS $k => $v) |
|
58 | 58 | { |
59 | 59 | $this->_def->addAttribute('img', $v, 'CDATA'); |
60 | 60 | $this->_def->addAttribute('div', $v, 'CDATA'); |
@@ -63,9 +63,9 @@ discard block |
||
63 | 63 | |
64 | 64 | // add attribute for widget component |
65 | 65 | $widgetAttrs = $this->_searchWidget($content); |
66 | - if(is_array($widgetAttrs)) |
|
66 | + if (is_array($widgetAttrs)) |
|
67 | 67 | { |
68 | - foreach($widgetAttrs AS $k => $v) |
|
68 | + foreach ($widgetAttrs AS $k => $v) |
|
69 | 69 | { |
70 | 70 | $this->_def->addAttribute('img', $v, 'CDATA'); |
71 | 71 | } |
@@ -82,19 +82,19 @@ discard block |
||
82 | 82 | preg_match_all('!<(?:(div)|img)([^>]*)editor_component=([^>]*)>(?(1)(.*?)</div>)!is', $content, $m); |
83 | 83 | |
84 | 84 | $attributeList = array(); |
85 | - if(is_array($m[2])) |
|
85 | + if (is_array($m[2])) |
|
86 | 86 | { |
87 | - foreach($m[2] as $key => $value) |
|
87 | + foreach ($m[2] as $key => $value) |
|
88 | 88 | { |
89 | 89 | unset($script, $m2); |
90 | 90 | $script = " {$m[2][$key]} editor_component={$m[3][$key]}"; |
91 | 91 | |
92 | - if(preg_match_all('/([a-z0-9_-]+)="([^"]+)"/is', $script, $m2)) |
|
92 | + if (preg_match_all('/([a-z0-9_-]+)="([^"]+)"/is', $script, $m2)) |
|
93 | 93 | { |
94 | - foreach($m2[1] as $value2) |
|
94 | + foreach ($m2[1] as $value2) |
|
95 | 95 | { |
96 | 96 | //SECISSUE check style attr |
97 | - if($value2 == 'style') |
|
97 | + if ($value2 == 'style') |
|
98 | 98 | { |
99 | 99 | continue; |
100 | 100 | } |
@@ -117,18 +117,18 @@ discard block |
||
117 | 117 | preg_match_all('!<(?:(div)|img)([^>]*)class="zbxe_widget_output"([^>]*)>(?(1)(.*?)</div>)!is', $content, $m); |
118 | 118 | |
119 | 119 | $attributeList = array(); |
120 | - if(is_array($m[3])) |
|
120 | + if (is_array($m[3])) |
|
121 | 121 | { |
122 | 122 | $content = str_replace('<img class="zbxe_widget_output"', '<img src="" class="zbxe_widget_output"', $content); |
123 | 123 | |
124 | - foreach($m[3] as $key => $value) |
|
124 | + foreach ($m[3] as $key => $value) |
|
125 | 125 | { |
126 | 126 | if (preg_match_all('/([a-z0-9_-]+)="([^"]+)"/is', $m[3][$key], $m2)) |
127 | 127 | { |
128 | - foreach($m2[1] as $value2) |
|
128 | + foreach ($m2[1] as $value2) |
|
129 | 129 | { |
130 | 130 | //SECISSUE check style attr |
131 | - if($value2 == 'style') |
|
131 | + if ($value2 == 'style') |
|
132 | 132 | { |
133 | 133 | continue; |
134 | 134 | } |
@@ -146,7 +146,7 @@ discard block |
||
146 | 146 | $whiteIframeUrlList = $oEmbedFilter->getWhiteIframeUrlList(); |
147 | 147 | |
148 | 148 | $whiteDomain = array(); |
149 | - foreach($whiteIframeUrlList as $value) |
|
149 | + foreach ($whiteIframeUrlList as $value) |
|
150 | 150 | { |
151 | 151 | $whiteDomain[] = preg_quote($value, '%'); |
152 | 152 | } |
@@ -18,197 +18,197 @@ |
||
18 | 18 | */ |
19 | 19 | class Html2Text { |
20 | 20 | |
21 | - // Private fields |
|
21 | + // Private fields |
|
22 | 22 | |
23 | - var $iCurrentLine = ""; |
|
24 | - var $iCurrentWord = ""; |
|
25 | - var $iCurrentWordArray; |
|
26 | - var $iCurrentWordIndex; |
|
27 | - var $iInScript; |
|
28 | - var $iListLevel = 0; |
|
29 | - var $iHtmlText; |
|
30 | - var $iMaxColumns; |
|
31 | - var $iHtmlParser; |
|
23 | + var $iCurrentLine = ""; |
|
24 | + var $iCurrentWord = ""; |
|
25 | + var $iCurrentWordArray; |
|
26 | + var $iCurrentWordIndex; |
|
27 | + var $iInScript; |
|
28 | + var $iListLevel = 0; |
|
29 | + var $iHtmlText; |
|
30 | + var $iMaxColumns; |
|
31 | + var $iHtmlParser; |
|
32 | 32 | |
33 | - // Constants |
|
33 | + // Constants |
|
34 | 34 | |
35 | - var $TOKEN_BR = 0; |
|
36 | - var $TOKEN_P = 1; |
|
37 | - var $TOKEN_LI = 2; |
|
38 | - var $TOKEN_AFTERLI = 3; |
|
39 | - var $TOKEN_UL = 4; |
|
40 | - var $TOKEN_ENDUL = 5; |
|
35 | + var $TOKEN_BR = 0; |
|
36 | + var $TOKEN_P = 1; |
|
37 | + var $TOKEN_LI = 2; |
|
38 | + var $TOKEN_AFTERLI = 3; |
|
39 | + var $TOKEN_UL = 4; |
|
40 | + var $TOKEN_ENDUL = 5; |
|
41 | 41 | |
42 | - function Html2Text ($aHtmlText, $aMaxColumns) { |
|
43 | - $this->iHtmlText = $aHtmlText; |
|
44 | - $this->iMaxColumns = $aMaxColumns; |
|
45 | - } |
|
42 | + function Html2Text ($aHtmlText, $aMaxColumns) { |
|
43 | + $this->iHtmlText = $aHtmlText; |
|
44 | + $this->iMaxColumns = $aMaxColumns; |
|
45 | + } |
|
46 | 46 | |
47 | - function convert() { |
|
48 | - $this->iHtmlParser = new HtmlParser($this->iHtmlText); |
|
49 | - $wholeText = ""; |
|
50 | - while (($line = $this->getLine()) !== false) { |
|
51 | - $wholeText .= ($line . "\r\n"); |
|
52 | - } |
|
53 | - return $wholeText; |
|
54 | - } |
|
47 | + function convert() { |
|
48 | + $this->iHtmlParser = new HtmlParser($this->iHtmlText); |
|
49 | + $wholeText = ""; |
|
50 | + while (($line = $this->getLine()) !== false) { |
|
51 | + $wholeText .= ($line . "\r\n"); |
|
52 | + } |
|
53 | + return $wholeText; |
|
54 | + } |
|
55 | 55 | |
56 | - function getLine() { |
|
57 | - while (true) { |
|
58 | - if (!$this->addWordToLine($this->iCurrentWord)) { |
|
59 | - $retvalue = $this->iCurrentLine; |
|
60 | - $this->iCurrentLine = ""; |
|
61 | - return $retvalue; |
|
62 | - } |
|
63 | - $word = $this->getWord(); |
|
64 | - if ($word === false) { |
|
65 | - if ($this->iCurrentLine == "") { |
|
66 | - break; |
|
67 | - } |
|
68 | - $retvalue = $this->iCurrentLine; |
|
69 | - $this->iCurrentLine = ""; |
|
70 | - $this->iInText = false; |
|
71 | - $this->iCurrentWord = ""; |
|
72 | - return $retvalue; |
|
73 | - } |
|
74 | - } |
|
75 | - return false; |
|
76 | - } |
|
56 | + function getLine() { |
|
57 | + while (true) { |
|
58 | + if (!$this->addWordToLine($this->iCurrentWord)) { |
|
59 | + $retvalue = $this->iCurrentLine; |
|
60 | + $this->iCurrentLine = ""; |
|
61 | + return $retvalue; |
|
62 | + } |
|
63 | + $word = $this->getWord(); |
|
64 | + if ($word === false) { |
|
65 | + if ($this->iCurrentLine == "") { |
|
66 | + break; |
|
67 | + } |
|
68 | + $retvalue = $this->iCurrentLine; |
|
69 | + $this->iCurrentLine = ""; |
|
70 | + $this->iInText = false; |
|
71 | + $this->iCurrentWord = ""; |
|
72 | + return $retvalue; |
|
73 | + } |
|
74 | + } |
|
75 | + return false; |
|
76 | + } |
|
77 | 77 | |
78 | - function addWordToLine ($word) { |
|
79 | - if ($this->iInScript) { |
|
80 | - return true; |
|
81 | - } |
|
82 | - $prevLine = $this->iCurrentLine; |
|
83 | - if ($word === $this->TOKEN_BR) { |
|
84 | - $this->iCurrentWord = ""; |
|
85 | - return false; |
|
86 | - } |
|
87 | - if ($word === $this->TOKEN_P) { |
|
88 | - $this->iCurrentWord = $this->TOKEN_BR; |
|
89 | - return false; |
|
90 | - } |
|
91 | - if ($word === $this->TOKEN_UL) { |
|
92 | - $this->iCurrentWord = $this->TOKEN_BR; |
|
93 | - return false; |
|
94 | - } |
|
95 | - if ($word === $this->TOKEN_ENDUL) { |
|
96 | - $this->iCurrentWord = $this->TOKEN_BR; |
|
97 | - return false; |
|
98 | - } |
|
99 | - if ($word === $this->TOKEN_LI) { |
|
100 | - $this->iCurrentWord = $this->TOKEN_AFTERLI; |
|
101 | - return false; |
|
102 | - } |
|
103 | - $toAdd = $word; |
|
104 | - if ($word === $this->TOKEN_AFTERLI) { |
|
105 | - $toAdd = ""; |
|
106 | - } |
|
107 | - if ($prevLine != "") { |
|
108 | - $prevLine .= " "; |
|
109 | - } |
|
110 | - else { |
|
111 | - $prevLine = $this->getIndentation($word === $this->TOKEN_AFTERLI); |
|
112 | - } |
|
113 | - $candidateLine = $prevLine . $toAdd; |
|
114 | - if (strlen ($candidateLine) > $this->iMaxColumns && $prevLine != "") { |
|
115 | - return false; |
|
116 | - } |
|
117 | - $this->iCurrentLine = $candidateLine; |
|
118 | - return true; |
|
119 | - } |
|
78 | + function addWordToLine ($word) { |
|
79 | + if ($this->iInScript) { |
|
80 | + return true; |
|
81 | + } |
|
82 | + $prevLine = $this->iCurrentLine; |
|
83 | + if ($word === $this->TOKEN_BR) { |
|
84 | + $this->iCurrentWord = ""; |
|
85 | + return false; |
|
86 | + } |
|
87 | + if ($word === $this->TOKEN_P) { |
|
88 | + $this->iCurrentWord = $this->TOKEN_BR; |
|
89 | + return false; |
|
90 | + } |
|
91 | + if ($word === $this->TOKEN_UL) { |
|
92 | + $this->iCurrentWord = $this->TOKEN_BR; |
|
93 | + return false; |
|
94 | + } |
|
95 | + if ($word === $this->TOKEN_ENDUL) { |
|
96 | + $this->iCurrentWord = $this->TOKEN_BR; |
|
97 | + return false; |
|
98 | + } |
|
99 | + if ($word === $this->TOKEN_LI) { |
|
100 | + $this->iCurrentWord = $this->TOKEN_AFTERLI; |
|
101 | + return false; |
|
102 | + } |
|
103 | + $toAdd = $word; |
|
104 | + if ($word === $this->TOKEN_AFTERLI) { |
|
105 | + $toAdd = ""; |
|
106 | + } |
|
107 | + if ($prevLine != "") { |
|
108 | + $prevLine .= " "; |
|
109 | + } |
|
110 | + else { |
|
111 | + $prevLine = $this->getIndentation($word === $this->TOKEN_AFTERLI); |
|
112 | + } |
|
113 | + $candidateLine = $prevLine . $toAdd; |
|
114 | + if (strlen ($candidateLine) > $this->iMaxColumns && $prevLine != "") { |
|
115 | + return false; |
|
116 | + } |
|
117 | + $this->iCurrentLine = $candidateLine; |
|
118 | + return true; |
|
119 | + } |
|
120 | 120 | |
121 | - function getWord() { |
|
122 | - while (true) { |
|
123 | - if ($this->iHtmlParser->iNodeType == NODE_TYPE_TEXT) { |
|
124 | - if (!$this->iInText) { |
|
125 | - $words = $this->splitWords($this->iHtmlParser->iNodeValue); |
|
126 | - $this->iCurrentWordArray = $words; |
|
127 | - $this->iCurrentWordIndex = 0; |
|
128 | - $this->iInText = true; |
|
129 | - } |
|
130 | - if ($this->iCurrentWordIndex < count($this->iCurrentWordArray)) { |
|
131 | - $this->iCurrentWord = $this->iCurrentWordArray[$this->iCurrentWordIndex++]; |
|
132 | - return $this->iCurrentWord; |
|
133 | - } |
|
134 | - else { |
|
135 | - $this->iInText = false; |
|
136 | - } |
|
137 | - } |
|
138 | - else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) { |
|
139 | - if (strcasecmp ($this->iHtmlParser->iNodeName, "br") == 0) { |
|
140 | - $this->iHtmlParser->parse(); |
|
141 | - $this->iCurrentWord = $this->TOKEN_BR; |
|
142 | - return $this->iCurrentWord; |
|
143 | - } |
|
144 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) { |
|
145 | - $this->iHtmlParser->parse(); |
|
146 | - $this->iCurrentWord = $this->TOKEN_P; |
|
147 | - return $this->iCurrentWord; |
|
148 | - } |
|
149 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
150 | - $this->iHtmlParser->parse(); |
|
151 | - $this->iCurrentWord = ""; |
|
152 | - $this->iInScript = true; |
|
153 | - return $this->iCurrentWord; |
|
154 | - } |
|
155 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
156 | - $this->iHtmlParser->parse(); |
|
157 | - $this->iCurrentWord = $this->TOKEN_UL; |
|
158 | - $this->iListLevel++; |
|
159 | - return $this->iCurrentWord; |
|
160 | - } |
|
161 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) { |
|
162 | - $this->iHtmlParser->parse(); |
|
163 | - $this->iCurrentWord = $this->TOKEN_LI; |
|
164 | - return $this->iCurrentWord; |
|
165 | - } |
|
166 | - } |
|
167 | - else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) { |
|
168 | - if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
169 | - $this->iHtmlParser->parse(); |
|
170 | - $this->iCurrentWord = ""; |
|
171 | - $this->iInScript = false; |
|
172 | - return $this->iCurrentWord; |
|
173 | - } |
|
174 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
175 | - $this->iHtmlParser->parse(); |
|
176 | - $this->iCurrentWord = $this->TOKEN_ENDUL; |
|
177 | - if ($this->iListLevel > 0) { |
|
178 | - $this->iListLevel--; |
|
179 | - } |
|
180 | - return $this->iCurrentWord; |
|
181 | - } |
|
182 | - } |
|
183 | - if (!$this->iHtmlParser->parse()) { |
|
184 | - break; |
|
185 | - } |
|
186 | - } |
|
187 | - return false; |
|
188 | - } |
|
121 | + function getWord() { |
|
122 | + while (true) { |
|
123 | + if ($this->iHtmlParser->iNodeType == NODE_TYPE_TEXT) { |
|
124 | + if (!$this->iInText) { |
|
125 | + $words = $this->splitWords($this->iHtmlParser->iNodeValue); |
|
126 | + $this->iCurrentWordArray = $words; |
|
127 | + $this->iCurrentWordIndex = 0; |
|
128 | + $this->iInText = true; |
|
129 | + } |
|
130 | + if ($this->iCurrentWordIndex < count($this->iCurrentWordArray)) { |
|
131 | + $this->iCurrentWord = $this->iCurrentWordArray[$this->iCurrentWordIndex++]; |
|
132 | + return $this->iCurrentWord; |
|
133 | + } |
|
134 | + else { |
|
135 | + $this->iInText = false; |
|
136 | + } |
|
137 | + } |
|
138 | + else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) { |
|
139 | + if (strcasecmp ($this->iHtmlParser->iNodeName, "br") == 0) { |
|
140 | + $this->iHtmlParser->parse(); |
|
141 | + $this->iCurrentWord = $this->TOKEN_BR; |
|
142 | + return $this->iCurrentWord; |
|
143 | + } |
|
144 | + else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) { |
|
145 | + $this->iHtmlParser->parse(); |
|
146 | + $this->iCurrentWord = $this->TOKEN_P; |
|
147 | + return $this->iCurrentWord; |
|
148 | + } |
|
149 | + else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
150 | + $this->iHtmlParser->parse(); |
|
151 | + $this->iCurrentWord = ""; |
|
152 | + $this->iInScript = true; |
|
153 | + return $this->iCurrentWord; |
|
154 | + } |
|
155 | + else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
156 | + $this->iHtmlParser->parse(); |
|
157 | + $this->iCurrentWord = $this->TOKEN_UL; |
|
158 | + $this->iListLevel++; |
|
159 | + return $this->iCurrentWord; |
|
160 | + } |
|
161 | + else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) { |
|
162 | + $this->iHtmlParser->parse(); |
|
163 | + $this->iCurrentWord = $this->TOKEN_LI; |
|
164 | + return $this->iCurrentWord; |
|
165 | + } |
|
166 | + } |
|
167 | + else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) { |
|
168 | + if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
169 | + $this->iHtmlParser->parse(); |
|
170 | + $this->iCurrentWord = ""; |
|
171 | + $this->iInScript = false; |
|
172 | + return $this->iCurrentWord; |
|
173 | + } |
|
174 | + else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
175 | + $this->iHtmlParser->parse(); |
|
176 | + $this->iCurrentWord = $this->TOKEN_ENDUL; |
|
177 | + if ($this->iListLevel > 0) { |
|
178 | + $this->iListLevel--; |
|
179 | + } |
|
180 | + return $this->iCurrentWord; |
|
181 | + } |
|
182 | + } |
|
183 | + if (!$this->iHtmlParser->parse()) { |
|
184 | + break; |
|
185 | + } |
|
186 | + } |
|
187 | + return false; |
|
188 | + } |
|
189 | 189 | |
190 | - function splitWords ($text) { |
|
191 | - $words = split ("[ \t\r\n]+", $text); |
|
192 | - for ($idx = 0; $idx < count($words); $idx++) { |
|
193 | - $words[$idx] = $this->htmlDecode($words[$idx]); |
|
194 | - } |
|
195 | - return $words; |
|
196 | - } |
|
190 | + function splitWords ($text) { |
|
191 | + $words = split ("[ \t\r\n]+", $text); |
|
192 | + for ($idx = 0; $idx < count($words); $idx++) { |
|
193 | + $words[$idx] = $this->htmlDecode($words[$idx]); |
|
194 | + } |
|
195 | + return $words; |
|
196 | + } |
|
197 | 197 | |
198 | - function htmlDecode ($text) { |
|
199 | - // TBD |
|
200 | - return $text; |
|
201 | - } |
|
198 | + function htmlDecode ($text) { |
|
199 | + // TBD |
|
200 | + return $text; |
|
201 | + } |
|
202 | 202 | |
203 | - function getIndentation ($hasLI) { |
|
204 | - $indent = ""; |
|
205 | - $idx = 0; |
|
206 | - for ($idx = 0; $idx < ($this->iListLevel - 1); $idx++) { |
|
207 | - $indent .= " "; |
|
208 | - } |
|
209 | - if ($this->iListLevel > 0) { |
|
210 | - $indent = $hasLI ? ($indent . "- ") : ($indent . " "); |
|
211 | - } |
|
212 | - return $indent; |
|
213 | - } |
|
203 | + function getIndentation ($hasLI) { |
|
204 | + $indent = ""; |
|
205 | + $idx = 0; |
|
206 | + for ($idx = 0; $idx < ($this->iListLevel - 1); $idx++) { |
|
207 | + $indent .= " "; |
|
208 | + } |
|
209 | + if ($this->iListLevel > 0) { |
|
210 | + $indent = $hasLI ? ($indent . "- ") : ($indent . " "); |
|
211 | + } |
|
212 | + return $indent; |
|
213 | + } |
|
214 | 214 | } |
@@ -39,7 +39,7 @@ discard block |
||
39 | 39 | var $TOKEN_UL = 4; |
40 | 40 | var $TOKEN_ENDUL = 5; |
41 | 41 | |
42 | - function Html2Text ($aHtmlText, $aMaxColumns) { |
|
42 | + function Html2Text($aHtmlText, $aMaxColumns) { |
|
43 | 43 | $this->iHtmlText = $aHtmlText; |
44 | 44 | $this->iMaxColumns = $aMaxColumns; |
45 | 45 | } |
@@ -75,7 +75,7 @@ discard block |
||
75 | 75 | return false; |
76 | 76 | } |
77 | 77 | |
78 | - function addWordToLine ($word) { |
|
78 | + function addWordToLine($word) { |
|
79 | 79 | if ($this->iInScript) { |
80 | 80 | return true; |
81 | 81 | } |
@@ -111,7 +111,7 @@ discard block |
||
111 | 111 | $prevLine = $this->getIndentation($word === $this->TOKEN_AFTERLI); |
112 | 112 | } |
113 | 113 | $candidateLine = $prevLine . $toAdd; |
114 | - if (strlen ($candidateLine) > $this->iMaxColumns && $prevLine != "") { |
|
114 | + if (strlen($candidateLine) > $this->iMaxColumns && $prevLine != "") { |
|
115 | 115 | return false; |
116 | 116 | } |
117 | 117 | $this->iCurrentLine = $candidateLine; |
@@ -136,42 +136,42 @@ discard block |
||
136 | 136 | } |
137 | 137 | } |
138 | 138 | else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) { |
139 | - if (strcasecmp ($this->iHtmlParser->iNodeName, "br") == 0) { |
|
139 | + if (strcasecmp($this->iHtmlParser->iNodeName, "br") == 0) { |
|
140 | 140 | $this->iHtmlParser->parse(); |
141 | 141 | $this->iCurrentWord = $this->TOKEN_BR; |
142 | 142 | return $this->iCurrentWord; |
143 | 143 | } |
144 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) { |
|
144 | + else if (strcasecmp($this->iHtmlParser->iNodeName, "p") == 0) { |
|
145 | 145 | $this->iHtmlParser->parse(); |
146 | 146 | $this->iCurrentWord = $this->TOKEN_P; |
147 | 147 | return $this->iCurrentWord; |
148 | 148 | } |
149 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
149 | + else if (strcasecmp($this->iHtmlParser->iNodeName, "script") == 0) { |
|
150 | 150 | $this->iHtmlParser->parse(); |
151 | 151 | $this->iCurrentWord = ""; |
152 | 152 | $this->iInScript = true; |
153 | 153 | return $this->iCurrentWord; |
154 | 154 | } |
155 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
155 | + else if (strcasecmp($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
156 | 156 | $this->iHtmlParser->parse(); |
157 | 157 | $this->iCurrentWord = $this->TOKEN_UL; |
158 | 158 | $this->iListLevel++; |
159 | 159 | return $this->iCurrentWord; |
160 | 160 | } |
161 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) { |
|
161 | + else if (strcasecmp($this->iHtmlParser->iNodeName, "li") == 0) { |
|
162 | 162 | $this->iHtmlParser->parse(); |
163 | 163 | $this->iCurrentWord = $this->TOKEN_LI; |
164 | 164 | return $this->iCurrentWord; |
165 | 165 | } |
166 | 166 | } |
167 | 167 | else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) { |
168 | - if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
168 | + if (strcasecmp($this->iHtmlParser->iNodeName, "script") == 0) { |
|
169 | 169 | $this->iHtmlParser->parse(); |
170 | 170 | $this->iCurrentWord = ""; |
171 | 171 | $this->iInScript = false; |
172 | 172 | return $this->iCurrentWord; |
173 | 173 | } |
174 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
174 | + else if (strcasecmp($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
175 | 175 | $this->iHtmlParser->parse(); |
176 | 176 | $this->iCurrentWord = $this->TOKEN_ENDUL; |
177 | 177 | if ($this->iListLevel > 0) { |
@@ -187,20 +187,20 @@ discard block |
||
187 | 187 | return false; |
188 | 188 | } |
189 | 189 | |
190 | - function splitWords ($text) { |
|
191 | - $words = split ("[ \t\r\n]+", $text); |
|
190 | + function splitWords($text) { |
|
191 | + $words = split("[ \t\r\n]+", $text); |
|
192 | 192 | for ($idx = 0; $idx < count($words); $idx++) { |
193 | 193 | $words[$idx] = $this->htmlDecode($words[$idx]); |
194 | 194 | } |
195 | 195 | return $words; |
196 | 196 | } |
197 | 197 | |
198 | - function htmlDecode ($text) { |
|
198 | + function htmlDecode($text) { |
|
199 | 199 | // TBD |
200 | 200 | return $text; |
201 | 201 | } |
202 | 202 | |
203 | - function getIndentation ($hasLI) { |
|
203 | + function getIndentation($hasLI) { |
|
204 | 204 | $indent = ""; |
205 | 205 | $idx = 0; |
206 | 206 | for ($idx = 0; $idx < ($this->iListLevel - 1); $idx++) { |
@@ -106,8 +106,7 @@ discard block |
||
106 | 106 | } |
107 | 107 | if ($prevLine != "") { |
108 | 108 | $prevLine .= " "; |
109 | - } |
|
110 | - else { |
|
109 | + } else { |
|
111 | 110 | $prevLine = $this->getIndentation($word === $this->TOKEN_AFTERLI); |
112 | 111 | } |
113 | 112 | $candidateLine = $prevLine . $toAdd; |
@@ -130,48 +129,40 @@ discard block |
||
130 | 129 | if ($this->iCurrentWordIndex < count($this->iCurrentWordArray)) { |
131 | 130 | $this->iCurrentWord = $this->iCurrentWordArray[$this->iCurrentWordIndex++]; |
132 | 131 | return $this->iCurrentWord; |
133 | - } |
|
134 | - else { |
|
132 | + } else { |
|
135 | 133 | $this->iInText = false; |
136 | 134 | } |
137 | - } |
|
138 | - else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) { |
|
135 | + } else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) { |
|
139 | 136 | if (strcasecmp ($this->iHtmlParser->iNodeName, "br") == 0) { |
140 | 137 | $this->iHtmlParser->parse(); |
141 | 138 | $this->iCurrentWord = $this->TOKEN_BR; |
142 | 139 | return $this->iCurrentWord; |
143 | - } |
|
144 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) { |
|
140 | + } else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) { |
|
145 | 141 | $this->iHtmlParser->parse(); |
146 | 142 | $this->iCurrentWord = $this->TOKEN_P; |
147 | 143 | return $this->iCurrentWord; |
148 | - } |
|
149 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
144 | + } else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
|
150 | 145 | $this->iHtmlParser->parse(); |
151 | 146 | $this->iCurrentWord = ""; |
152 | 147 | $this->iInScript = true; |
153 | 148 | return $this->iCurrentWord; |
154 | - } |
|
155 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
149 | + } else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
156 | 150 | $this->iHtmlParser->parse(); |
157 | 151 | $this->iCurrentWord = $this->TOKEN_UL; |
158 | 152 | $this->iListLevel++; |
159 | 153 | return $this->iCurrentWord; |
160 | - } |
|
161 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) { |
|
154 | + } else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) { |
|
162 | 155 | $this->iHtmlParser->parse(); |
163 | 156 | $this->iCurrentWord = $this->TOKEN_LI; |
164 | 157 | return $this->iCurrentWord; |
165 | 158 | } |
166 | - } |
|
167 | - else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) { |
|
159 | + } else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) { |
|
168 | 160 | if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) { |
169 | 161 | $this->iHtmlParser->parse(); |
170 | 162 | $this->iCurrentWord = ""; |
171 | 163 | $this->iInScript = false; |
172 | 164 | return $this->iCurrentWord; |
173 | - } |
|
174 | - else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
165 | + } else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) { |
|
175 | 166 | $this->iHtmlParser->parse(); |
176 | 167 | $this->iCurrentWord = $this->TOKEN_ENDUL; |
177 | 168 | if ($this->iListLevel > 0) { |
@@ -30,336 +30,336 @@ |
||
30 | 30 | */ |
31 | 31 | class HtmlParser { |
32 | 32 | |
33 | - /** |
|
34 | - * Field iNodeType. |
|
35 | - * May be one of the NODE_TYPE_* constants above. |
|
36 | - */ |
|
37 | - var $iNodeType; |
|
33 | + /** |
|
34 | + * Field iNodeType. |
|
35 | + * May be one of the NODE_TYPE_* constants above. |
|
36 | + */ |
|
37 | + var $iNodeType; |
|
38 | 38 | |
39 | - /** |
|
40 | - * Field iNodeName. |
|
41 | - * For elements, it's the name of the element. |
|
42 | - */ |
|
43 | - var $iNodeName = ""; |
|
39 | + /** |
|
40 | + * Field iNodeName. |
|
41 | + * For elements, it's the name of the element. |
|
42 | + */ |
|
43 | + var $iNodeName = ""; |
|
44 | 44 | |
45 | - /** |
|
46 | - * Field iNodeValue. |
|
47 | - * For text nodes, it's the text. |
|
48 | - */ |
|
49 | - var $iNodeValue = ""; |
|
45 | + /** |
|
46 | + * Field iNodeValue. |
|
47 | + * For text nodes, it's the text. |
|
48 | + */ |
|
49 | + var $iNodeValue = ""; |
|
50 | 50 | |
51 | - /** |
|
52 | - * Field iNodeAttributes. |
|
53 | - * A string-indexed array containing attribute values |
|
54 | - * of the current node. Indexes are always lowercase. |
|
55 | - */ |
|
56 | - var $iNodeAttributes; |
|
51 | + /** |
|
52 | + * Field iNodeAttributes. |
|
53 | + * A string-indexed array containing attribute values |
|
54 | + * of the current node. Indexes are always lowercase. |
|
55 | + */ |
|
56 | + var $iNodeAttributes; |
|
57 | 57 | |
58 | - // The following fields should be |
|
59 | - // considered private: |
|
58 | + // The following fields should be |
|
59 | + // considered private: |
|
60 | 60 | |
61 | - var $iHtmlText; |
|
62 | - var $iHtmlTextLength; |
|
63 | - var $iHtmlTextIndex = 0; |
|
64 | - var $iHtmlCurrentChar; |
|
65 | - var $BOE_ARRAY; |
|
66 | - var $B_ARRAY; |
|
67 | - var $BOS_ARRAY; |
|
61 | + var $iHtmlText; |
|
62 | + var $iHtmlTextLength; |
|
63 | + var $iHtmlTextIndex = 0; |
|
64 | + var $iHtmlCurrentChar; |
|
65 | + var $BOE_ARRAY; |
|
66 | + var $B_ARRAY; |
|
67 | + var $BOS_ARRAY; |
|
68 | 68 | |
69 | - /** |
|
70 | - * Constructor. |
|
71 | - * Constructs an HtmlParser instance with |
|
72 | - * the HTML text given. |
|
73 | - */ |
|
74 | - function HtmlParser ($aHtmlText) { |
|
75 | - $this->iHtmlText = $aHtmlText; |
|
76 | - $this->iHtmlTextLength = strlen($aHtmlText); |
|
77 | - $this->iNodeAttributes = array(); |
|
78 | - $this->setTextIndex (0); |
|
69 | + /** |
|
70 | + * Constructor. |
|
71 | + * Constructs an HtmlParser instance with |
|
72 | + * the HTML text given. |
|
73 | + */ |
|
74 | + function HtmlParser ($aHtmlText) { |
|
75 | + $this->iHtmlText = $aHtmlText; |
|
76 | + $this->iHtmlTextLength = strlen($aHtmlText); |
|
77 | + $this->iNodeAttributes = array(); |
|
78 | + $this->setTextIndex (0); |
|
79 | 79 | |
80 | - $this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" ); |
|
81 | - $this->B_ARRAY = array (" ", "\t", "\r", "\n" ); |
|
82 | - $this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" ); |
|
83 | - } |
|
80 | + $this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" ); |
|
81 | + $this->B_ARRAY = array (" ", "\t", "\r", "\n" ); |
|
82 | + $this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" ); |
|
83 | + } |
|
84 | 84 | |
85 | - /** |
|
86 | - * Method parse. |
|
87 | - * Parses the next node. Returns false only if |
|
88 | - * the end of the HTML text has been reached. |
|
89 | - * Updates values of iNode* fields. |
|
90 | - */ |
|
91 | - function parse() { |
|
92 | - $text = $this->skipToElement(); |
|
93 | - if ($text != "") { |
|
94 | - $this->iNodeType = NODE_TYPE_TEXT; |
|
95 | - $this->iNodeName = "Text"; |
|
96 | - $this->iNodeValue = $text; |
|
97 | - return true; |
|
98 | - } |
|
99 | - return $this->readTag(); |
|
100 | - } |
|
85 | + /** |
|
86 | + * Method parse. |
|
87 | + * Parses the next node. Returns false only if |
|
88 | + * the end of the HTML text has been reached. |
|
89 | + * Updates values of iNode* fields. |
|
90 | + */ |
|
91 | + function parse() { |
|
92 | + $text = $this->skipToElement(); |
|
93 | + if ($text != "") { |
|
94 | + $this->iNodeType = NODE_TYPE_TEXT; |
|
95 | + $this->iNodeName = "Text"; |
|
96 | + $this->iNodeValue = $text; |
|
97 | + return true; |
|
98 | + } |
|
99 | + return $this->readTag(); |
|
100 | + } |
|
101 | 101 | |
102 | - function clearAttributes() { |
|
103 | - $this->iNodeAttributes = array(); |
|
104 | - } |
|
102 | + function clearAttributes() { |
|
103 | + $this->iNodeAttributes = array(); |
|
104 | + } |
|
105 | 105 | |
106 | - function readTag() { |
|
107 | - if ($this->iCurrentChar != "<") { |
|
108 | - $this->iNodeType = NODE_TYPE_DONE; |
|
109 | - return false; |
|
110 | - } |
|
111 | - $this->clearAttributes(); |
|
112 | - $this->skipMaxInTag ("<", 1); |
|
113 | - if ($this->iCurrentChar == '/') { |
|
114 | - $this->moveNext(); |
|
115 | - $name = $this->skipToBlanksInTag(); |
|
116 | - $this->iNodeType = NODE_TYPE_ENDELEMENT; |
|
117 | - $this->iNodeName = $name; |
|
118 | - $this->iNodeValue = ""; |
|
119 | - $this->skipEndOfTag(); |
|
120 | - return true; |
|
121 | - } |
|
122 | - $name = $this->skipToBlanksOrSlashInTag(); |
|
123 | - if (!$this->isValidTagIdentifier ($name)) { |
|
124 | - $comment = false; |
|
125 | - if (strpos($name, "!--") === 0) { |
|
126 | - $ppos = strpos($name, "--", 3); |
|
127 | - if (strpos($name, "--", 3) === (strlen($name) - 2)) { |
|
128 | - $this->iNodeType = NODE_TYPE_COMMENT; |
|
129 | - $this->iNodeName = "Comment"; |
|
130 | - $this->iNodeValue = "<" . $name . ">"; |
|
131 | - $comment = true; |
|
132 | - } |
|
133 | - else { |
|
134 | - $rest = $this->skipToStringInTag ("-->"); |
|
135 | - if ($rest != "") { |
|
136 | - $this->iNodeType = NODE_TYPE_COMMENT; |
|
137 | - $this->iNodeName = "Comment"; |
|
138 | - $this->iNodeValue = "<" . $name . $rest; |
|
139 | - $comment = true; |
|
140 | - // Already skipped end of tag |
|
141 | - return true; |
|
142 | - } |
|
143 | - } |
|
144 | - } |
|
145 | - if (!$comment) { |
|
146 | - $this->iNodeType = NODE_TYPE_TEXT; |
|
147 | - $this->iNodeName = "Text"; |
|
148 | - $this->iNodeValue = "<" . $name; |
|
149 | - return true; |
|
150 | - } |
|
151 | - } |
|
152 | - else { |
|
153 | - $this->iNodeType = NODE_TYPE_ELEMENT; |
|
154 | - $this->iNodeValue = ""; |
|
155 | - $this->iNodeName = $name; |
|
156 | - while ($this->skipBlanksInTag()) { |
|
157 | - $attrName = $this->skipToBlanksOrEqualsInTag(); |
|
158 | - if ($attrName != "" && $attrName != "/") { |
|
159 | - $this->skipBlanksInTag(); |
|
160 | - if ($this->iCurrentChar == "=") { |
|
161 | - $this->skipEqualsInTag(); |
|
162 | - $this->skipBlanksInTag(); |
|
163 | - $value = $this->readValueInTag(); |
|
164 | - $this->iNodeAttributes[strtolower($attrName)] = $value; |
|
165 | - } |
|
166 | - else { |
|
167 | - $this->iNodeAttributes[strtolower($attrName)] = ""; |
|
168 | - } |
|
169 | - } |
|
170 | - } |
|
171 | - } |
|
172 | - $this->skipEndOfTag(); |
|
173 | - return true; |
|
174 | - } |
|
106 | + function readTag() { |
|
107 | + if ($this->iCurrentChar != "<") { |
|
108 | + $this->iNodeType = NODE_TYPE_DONE; |
|
109 | + return false; |
|
110 | + } |
|
111 | + $this->clearAttributes(); |
|
112 | + $this->skipMaxInTag ("<", 1); |
|
113 | + if ($this->iCurrentChar == '/') { |
|
114 | + $this->moveNext(); |
|
115 | + $name = $this->skipToBlanksInTag(); |
|
116 | + $this->iNodeType = NODE_TYPE_ENDELEMENT; |
|
117 | + $this->iNodeName = $name; |
|
118 | + $this->iNodeValue = ""; |
|
119 | + $this->skipEndOfTag(); |
|
120 | + return true; |
|
121 | + } |
|
122 | + $name = $this->skipToBlanksOrSlashInTag(); |
|
123 | + if (!$this->isValidTagIdentifier ($name)) { |
|
124 | + $comment = false; |
|
125 | + if (strpos($name, "!--") === 0) { |
|
126 | + $ppos = strpos($name, "--", 3); |
|
127 | + if (strpos($name, "--", 3) === (strlen($name) - 2)) { |
|
128 | + $this->iNodeType = NODE_TYPE_COMMENT; |
|
129 | + $this->iNodeName = "Comment"; |
|
130 | + $this->iNodeValue = "<" . $name . ">"; |
|
131 | + $comment = true; |
|
132 | + } |
|
133 | + else { |
|
134 | + $rest = $this->skipToStringInTag ("-->"); |
|
135 | + if ($rest != "") { |
|
136 | + $this->iNodeType = NODE_TYPE_COMMENT; |
|
137 | + $this->iNodeName = "Comment"; |
|
138 | + $this->iNodeValue = "<" . $name . $rest; |
|
139 | + $comment = true; |
|
140 | + // Already skipped end of tag |
|
141 | + return true; |
|
142 | + } |
|
143 | + } |
|
144 | + } |
|
145 | + if (!$comment) { |
|
146 | + $this->iNodeType = NODE_TYPE_TEXT; |
|
147 | + $this->iNodeName = "Text"; |
|
148 | + $this->iNodeValue = "<" . $name; |
|
149 | + return true; |
|
150 | + } |
|
151 | + } |
|
152 | + else { |
|
153 | + $this->iNodeType = NODE_TYPE_ELEMENT; |
|
154 | + $this->iNodeValue = ""; |
|
155 | + $this->iNodeName = $name; |
|
156 | + while ($this->skipBlanksInTag()) { |
|
157 | + $attrName = $this->skipToBlanksOrEqualsInTag(); |
|
158 | + if ($attrName != "" && $attrName != "/") { |
|
159 | + $this->skipBlanksInTag(); |
|
160 | + if ($this->iCurrentChar == "=") { |
|
161 | + $this->skipEqualsInTag(); |
|
162 | + $this->skipBlanksInTag(); |
|
163 | + $value = $this->readValueInTag(); |
|
164 | + $this->iNodeAttributes[strtolower($attrName)] = $value; |
|
165 | + } |
|
166 | + else { |
|
167 | + $this->iNodeAttributes[strtolower($attrName)] = ""; |
|
168 | + } |
|
169 | + } |
|
170 | + } |
|
171 | + } |
|
172 | + $this->skipEndOfTag(); |
|
173 | + return true; |
|
174 | + } |
|
175 | 175 | |
176 | - function isValidTagIdentifier ($name) { |
|
177 | - return ereg ("^[A-Za-z0-9_\\-]+$", $name); |
|
178 | - } |
|
176 | + function isValidTagIdentifier ($name) { |
|
177 | + return ereg ("^[A-Za-z0-9_\\-]+$", $name); |
|
178 | + } |
|
179 | 179 | |
180 | - function skipBlanksInTag() { |
|
181 | - return "" != ($this->skipInTag ($this->B_ARRAY)); |
|
182 | - } |
|
180 | + function skipBlanksInTag() { |
|
181 | + return "" != ($this->skipInTag ($this->B_ARRAY)); |
|
182 | + } |
|
183 | 183 | |
184 | - function skipToBlanksOrEqualsInTag() { |
|
185 | - return $this->skipToInTag ($this->BOE_ARRAY); |
|
186 | - } |
|
184 | + function skipToBlanksOrEqualsInTag() { |
|
185 | + return $this->skipToInTag ($this->BOE_ARRAY); |
|
186 | + } |
|
187 | 187 | |
188 | - function skipToBlanksInTag() { |
|
189 | - return $this->skipToInTag ($this->B_ARRAY); |
|
190 | - } |
|
188 | + function skipToBlanksInTag() { |
|
189 | + return $this->skipToInTag ($this->B_ARRAY); |
|
190 | + } |
|
191 | 191 | |
192 | - function skipToBlanksOrSlashInTag() { |
|
193 | - return $this->skipToInTag ($this->BOS_ARRAY); |
|
194 | - } |
|
192 | + function skipToBlanksOrSlashInTag() { |
|
193 | + return $this->skipToInTag ($this->BOS_ARRAY); |
|
194 | + } |
|
195 | 195 | |
196 | - function skipEqualsInTag() { |
|
197 | - return $this->skipMaxInTag ("=", 1); |
|
198 | - } |
|
196 | + function skipEqualsInTag() { |
|
197 | + return $this->skipMaxInTag ("=", 1); |
|
198 | + } |
|
199 | 199 | |
200 | - function readValueInTag() { |
|
201 | - $ch = $this->iCurrentChar; |
|
202 | - $value = ""; |
|
203 | - if ($ch == "\"") { |
|
204 | - $this->skipMaxInTag ("\"", 1); |
|
205 | - $value = $this->skipToInTag ("\""); |
|
206 | - $this->skipMaxInTag ("\"", 1); |
|
207 | - } |
|
208 | - else if ($ch == "'") { |
|
209 | - $this->skipMaxInTag ("'", 1); |
|
210 | - $value = $this->skipToInTag ("'"); |
|
211 | - $this->skipMaxInTag ("'", 1); |
|
212 | - } |
|
213 | - else { |
|
214 | - $value = $this->skipToBlanksInTag(); |
|
215 | - } |
|
216 | - return $value; |
|
217 | - } |
|
200 | + function readValueInTag() { |
|
201 | + $ch = $this->iCurrentChar; |
|
202 | + $value = ""; |
|
203 | + if ($ch == "\"") { |
|
204 | + $this->skipMaxInTag ("\"", 1); |
|
205 | + $value = $this->skipToInTag ("\""); |
|
206 | + $this->skipMaxInTag ("\"", 1); |
|
207 | + } |
|
208 | + else if ($ch == "'") { |
|
209 | + $this->skipMaxInTag ("'", 1); |
|
210 | + $value = $this->skipToInTag ("'"); |
|
211 | + $this->skipMaxInTag ("'", 1); |
|
212 | + } |
|
213 | + else { |
|
214 | + $value = $this->skipToBlanksInTag(); |
|
215 | + } |
|
216 | + return $value; |
|
217 | + } |
|
218 | 218 | |
219 | - function setTextIndex ($index) { |
|
220 | - $this->iHtmlTextIndex = $index; |
|
221 | - if ($index >= $this->iHtmlTextLength) { |
|
222 | - $this->iCurrentChar = -1; |
|
223 | - } |
|
224 | - else { |
|
225 | - $this->iCurrentChar = $this->iHtmlText{$index}; |
|
226 | - } |
|
227 | - } |
|
219 | + function setTextIndex ($index) { |
|
220 | + $this->iHtmlTextIndex = $index; |
|
221 | + if ($index >= $this->iHtmlTextLength) { |
|
222 | + $this->iCurrentChar = -1; |
|
223 | + } |
|
224 | + else { |
|
225 | + $this->iCurrentChar = $this->iHtmlText{$index}; |
|
226 | + } |
|
227 | + } |
|
228 | 228 | |
229 | - function moveNext() { |
|
230 | - if ($this->iHtmlTextIndex < $this->iHtmlTextLength) { |
|
231 | - $this->setTextIndex ($this->iHtmlTextIndex + 1); |
|
232 | - return true; |
|
233 | - } |
|
234 | - else { |
|
235 | - return false; |
|
236 | - } |
|
237 | - } |
|
229 | + function moveNext() { |
|
230 | + if ($this->iHtmlTextIndex < $this->iHtmlTextLength) { |
|
231 | + $this->setTextIndex ($this->iHtmlTextIndex + 1); |
|
232 | + return true; |
|
233 | + } |
|
234 | + else { |
|
235 | + return false; |
|
236 | + } |
|
237 | + } |
|
238 | 238 | |
239 | - function skipEndOfTag() { |
|
240 | - while (($ch = $this->iCurrentChar) !== -1) { |
|
241 | - if ($ch == ">") { |
|
242 | - $this->moveNext(); |
|
243 | - return; |
|
244 | - } |
|
245 | - $this->moveNext(); |
|
246 | - } |
|
247 | - } |
|
239 | + function skipEndOfTag() { |
|
240 | + while (($ch = $this->iCurrentChar) !== -1) { |
|
241 | + if ($ch == ">") { |
|
242 | + $this->moveNext(); |
|
243 | + return; |
|
244 | + } |
|
245 | + $this->moveNext(); |
|
246 | + } |
|
247 | + } |
|
248 | 248 | |
249 | - function skipInTag ($chars) { |
|
250 | - $sb = ""; |
|
251 | - while (($ch = $this->iCurrentChar) !== -1) { |
|
252 | - if ($ch == ">") { |
|
253 | - return $sb; |
|
254 | - } else { |
|
255 | - $match = false; |
|
256 | - for ($idx = 0; $idx < count($chars); $idx++) { |
|
257 | - if ($ch == $chars[$idx]) { |
|
258 | - $match = true; |
|
259 | - break; |
|
260 | - } |
|
261 | - } |
|
262 | - if (!$match) { |
|
263 | - return $sb; |
|
264 | - } |
|
265 | - $sb .= $ch; |
|
266 | - $this->moveNext(); |
|
267 | - } |
|
268 | - } |
|
269 | - return $sb; |
|
270 | - } |
|
249 | + function skipInTag ($chars) { |
|
250 | + $sb = ""; |
|
251 | + while (($ch = $this->iCurrentChar) !== -1) { |
|
252 | + if ($ch == ">") { |
|
253 | + return $sb; |
|
254 | + } else { |
|
255 | + $match = false; |
|
256 | + for ($idx = 0; $idx < count($chars); $idx++) { |
|
257 | + if ($ch == $chars[$idx]) { |
|
258 | + $match = true; |
|
259 | + break; |
|
260 | + } |
|
261 | + } |
|
262 | + if (!$match) { |
|
263 | + return $sb; |
|
264 | + } |
|
265 | + $sb .= $ch; |
|
266 | + $this->moveNext(); |
|
267 | + } |
|
268 | + } |
|
269 | + return $sb; |
|
270 | + } |
|
271 | 271 | |
272 | - function skipMaxInTag ($chars, $maxChars) { |
|
273 | - $sb = ""; |
|
274 | - $count = 0; |
|
275 | - while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) { |
|
276 | - if ($ch == ">") { |
|
277 | - return $sb; |
|
278 | - } else { |
|
279 | - $match = false; |
|
280 | - for ($idx = 0; $idx < count($chars); $idx++) { |
|
281 | - if ($ch == $chars[$idx]) { |
|
282 | - $match = true; |
|
283 | - break; |
|
284 | - } |
|
285 | - } |
|
286 | - if (!$match) { |
|
287 | - return $sb; |
|
288 | - } |
|
289 | - $sb .= $ch; |
|
290 | - $this->moveNext(); |
|
291 | - } |
|
292 | - } |
|
293 | - return $sb; |
|
294 | - } |
|
272 | + function skipMaxInTag ($chars, $maxChars) { |
|
273 | + $sb = ""; |
|
274 | + $count = 0; |
|
275 | + while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) { |
|
276 | + if ($ch == ">") { |
|
277 | + return $sb; |
|
278 | + } else { |
|
279 | + $match = false; |
|
280 | + for ($idx = 0; $idx < count($chars); $idx++) { |
|
281 | + if ($ch == $chars[$idx]) { |
|
282 | + $match = true; |
|
283 | + break; |
|
284 | + } |
|
285 | + } |
|
286 | + if (!$match) { |
|
287 | + return $sb; |
|
288 | + } |
|
289 | + $sb .= $ch; |
|
290 | + $this->moveNext(); |
|
291 | + } |
|
292 | + } |
|
293 | + return $sb; |
|
294 | + } |
|
295 | 295 | |
296 | - function skipToInTag ($chars) { |
|
297 | - $sb = ""; |
|
298 | - while (($ch = $this->iCurrentChar) !== -1) { |
|
299 | - $match = $ch == ">"; |
|
300 | - if (!$match) { |
|
301 | - for ($idx = 0; $idx < count($chars); $idx++) { |
|
302 | - if ($ch == $chars[$idx]) { |
|
303 | - $match = true; |
|
304 | - break; |
|
305 | - } |
|
306 | - } |
|
307 | - } |
|
308 | - if ($match) { |
|
309 | - return $sb; |
|
310 | - } |
|
311 | - $sb .= $ch; |
|
312 | - $this->moveNext(); |
|
313 | - } |
|
314 | - return $sb; |
|
315 | - } |
|
296 | + function skipToInTag ($chars) { |
|
297 | + $sb = ""; |
|
298 | + while (($ch = $this->iCurrentChar) !== -1) { |
|
299 | + $match = $ch == ">"; |
|
300 | + if (!$match) { |
|
301 | + for ($idx = 0; $idx < count($chars); $idx++) { |
|
302 | + if ($ch == $chars[$idx]) { |
|
303 | + $match = true; |
|
304 | + break; |
|
305 | + } |
|
306 | + } |
|
307 | + } |
|
308 | + if ($match) { |
|
309 | + return $sb; |
|
310 | + } |
|
311 | + $sb .= $ch; |
|
312 | + $this->moveNext(); |
|
313 | + } |
|
314 | + return $sb; |
|
315 | + } |
|
316 | 316 | |
317 | - function skipToElement() { |
|
318 | - $sb = ""; |
|
319 | - while (($ch = $this->iCurrentChar) !== -1) { |
|
320 | - if ($ch == "<") { |
|
321 | - return $sb; |
|
322 | - } |
|
323 | - $sb .= $ch; |
|
324 | - $this->moveNext(); |
|
325 | - } |
|
326 | - return $sb; |
|
327 | - } |
|
317 | + function skipToElement() { |
|
318 | + $sb = ""; |
|
319 | + while (($ch = $this->iCurrentChar) !== -1) { |
|
320 | + if ($ch == "<") { |
|
321 | + return $sb; |
|
322 | + } |
|
323 | + $sb .= $ch; |
|
324 | + $this->moveNext(); |
|
325 | + } |
|
326 | + return $sb; |
|
327 | + } |
|
328 | 328 | |
329 | - /** |
|
330 | - * Returns text between current position and $needle, |
|
331 | - * inclusive, or "" if not found. The current index is moved to a point |
|
332 | - * after the location of $needle, or not moved at all |
|
333 | - * if nothing is found. |
|
334 | - */ |
|
335 | - function skipToStringInTag ($needle) { |
|
336 | - $pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex); |
|
337 | - if ($pos === false) { |
|
338 | - return ""; |
|
339 | - } |
|
340 | - $top = $pos + strlen($needle); |
|
341 | - $retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex); |
|
342 | - $this->setTextIndex ($top); |
|
343 | - return $retvalue; |
|
344 | - } |
|
329 | + /** |
|
330 | + * Returns text between current position and $needle, |
|
331 | + * inclusive, or "" if not found. The current index is moved to a point |
|
332 | + * after the location of $needle, or not moved at all |
|
333 | + * if nothing is found. |
|
334 | + */ |
|
335 | + function skipToStringInTag ($needle) { |
|
336 | + $pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex); |
|
337 | + if ($pos === false) { |
|
338 | + return ""; |
|
339 | + } |
|
340 | + $top = $pos + strlen($needle); |
|
341 | + $retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex); |
|
342 | + $this->setTextIndex ($top); |
|
343 | + return $retvalue; |
|
344 | + } |
|
345 | 345 | } |
346 | 346 | |
347 | 347 | function HtmlParser_ForFile ($fileName) { |
348 | - return HtmlParser_ForURL($fileName); |
|
348 | + return HtmlParser_ForURL($fileName); |
|
349 | 349 | } |
350 | 350 | |
351 | 351 | function HtmlParser_ForURL ($url) { |
352 | - $fp = fopen ($url, "r"); |
|
353 | - $content = ""; |
|
354 | - while (true) { |
|
355 | - $data = fread ($fp, 8192); |
|
356 | - if (strlen($data) == 0) { |
|
357 | - break; |
|
358 | - } |
|
359 | - $content .= $data; |
|
360 | - } |
|
361 | - fclose ($fp); |
|
362 | - return new HtmlParser ($content); |
|
352 | + $fp = fopen ($url, "r"); |
|
353 | + $content = ""; |
|
354 | + while (true) { |
|
355 | + $data = fread ($fp, 8192); |
|
356 | + if (strlen($data) == 0) { |
|
357 | + break; |
|
358 | + } |
|
359 | + $content .= $data; |
|
360 | + } |
|
361 | + fclose ($fp); |
|
362 | + return new HtmlParser ($content); |
|
363 | 363 | } |
364 | 364 | |
365 | 365 | php?> |
@@ -10,12 +10,12 @@ discard block |
||
10 | 10 | * - Leo West (performance improvements) |
11 | 11 | */ |
12 | 12 | |
13 | -define ("NODE_TYPE_START",0); |
|
14 | -define ("NODE_TYPE_ELEMENT",1); |
|
15 | -define ("NODE_TYPE_ENDELEMENT",2); |
|
16 | -define ("NODE_TYPE_TEXT",3); |
|
17 | -define ("NODE_TYPE_COMMENT",4); |
|
18 | -define ("NODE_TYPE_DONE",5); |
|
13 | +define("NODE_TYPE_START", 0); |
|
14 | +define("NODE_TYPE_ELEMENT", 1); |
|
15 | +define("NODE_TYPE_ENDELEMENT", 2); |
|
16 | +define("NODE_TYPE_TEXT", 3); |
|
17 | +define("NODE_TYPE_COMMENT", 4); |
|
18 | +define("NODE_TYPE_DONE", 5); |
|
19 | 19 | |
20 | 20 | /** |
21 | 21 | * Class HtmlParser. |
@@ -71,15 +71,15 @@ discard block |
||
71 | 71 | * Constructs an HtmlParser instance with |
72 | 72 | * the HTML text given. |
73 | 73 | */ |
74 | - function HtmlParser ($aHtmlText) { |
|
74 | + function HtmlParser($aHtmlText) { |
|
75 | 75 | $this->iHtmlText = $aHtmlText; |
76 | 76 | $this->iHtmlTextLength = strlen($aHtmlText); |
77 | 77 | $this->iNodeAttributes = array(); |
78 | - $this->setTextIndex (0); |
|
78 | + $this->setTextIndex(0); |
|
79 | 79 | |
80 | - $this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" ); |
|
81 | - $this->B_ARRAY = array (" ", "\t", "\r", "\n" ); |
|
82 | - $this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" ); |
|
80 | + $this->BOE_ARRAY = array(" ", "\t", "\r", "\n", "="); |
|
81 | + $this->B_ARRAY = array(" ", "\t", "\r", "\n"); |
|
82 | + $this->BOS_ARRAY = array(" ", "\t", "\r", "\n", "/"); |
|
83 | 83 | } |
84 | 84 | |
85 | 85 | /** |
@@ -109,7 +109,7 @@ discard block |
||
109 | 109 | return false; |
110 | 110 | } |
111 | 111 | $this->clearAttributes(); |
112 | - $this->skipMaxInTag ("<", 1); |
|
112 | + $this->skipMaxInTag("<", 1); |
|
113 | 113 | if ($this->iCurrentChar == '/') { |
114 | 114 | $this->moveNext(); |
115 | 115 | $name = $this->skipToBlanksInTag(); |
@@ -120,7 +120,7 @@ discard block |
||
120 | 120 | return true; |
121 | 121 | } |
122 | 122 | $name = $this->skipToBlanksOrSlashInTag(); |
123 | - if (!$this->isValidTagIdentifier ($name)) { |
|
123 | + if (!$this->isValidTagIdentifier($name)) { |
|
124 | 124 | $comment = false; |
125 | 125 | if (strpos($name, "!--") === 0) { |
126 | 126 | $ppos = strpos($name, "--", 3); |
@@ -131,7 +131,7 @@ discard block |
||
131 | 131 | $comment = true; |
132 | 132 | } |
133 | 133 | else { |
134 | - $rest = $this->skipToStringInTag ("-->"); |
|
134 | + $rest = $this->skipToStringInTag("-->"); |
|
135 | 135 | if ($rest != "") { |
136 | 136 | $this->iNodeType = NODE_TYPE_COMMENT; |
137 | 137 | $this->iNodeName = "Comment"; |
@@ -173,42 +173,42 @@ discard block |
||
173 | 173 | return true; |
174 | 174 | } |
175 | 175 | |
176 | - function isValidTagIdentifier ($name) { |
|
177 | - return ereg ("^[A-Za-z0-9_\\-]+$", $name); |
|
176 | + function isValidTagIdentifier($name) { |
|
177 | + return ereg("^[A-Za-z0-9_\\-]+$", $name); |
|
178 | 178 | } |
179 | 179 | |
180 | 180 | function skipBlanksInTag() { |
181 | - return "" != ($this->skipInTag ($this->B_ARRAY)); |
|
181 | + return "" != ($this->skipInTag($this->B_ARRAY)); |
|
182 | 182 | } |
183 | 183 | |
184 | 184 | function skipToBlanksOrEqualsInTag() { |
185 | - return $this->skipToInTag ($this->BOE_ARRAY); |
|
185 | + return $this->skipToInTag($this->BOE_ARRAY); |
|
186 | 186 | } |
187 | 187 | |
188 | 188 | function skipToBlanksInTag() { |
189 | - return $this->skipToInTag ($this->B_ARRAY); |
|
189 | + return $this->skipToInTag($this->B_ARRAY); |
|
190 | 190 | } |
191 | 191 | |
192 | 192 | function skipToBlanksOrSlashInTag() { |
193 | - return $this->skipToInTag ($this->BOS_ARRAY); |
|
193 | + return $this->skipToInTag($this->BOS_ARRAY); |
|
194 | 194 | } |
195 | 195 | |
196 | 196 | function skipEqualsInTag() { |
197 | - return $this->skipMaxInTag ("=", 1); |
|
197 | + return $this->skipMaxInTag("=", 1); |
|
198 | 198 | } |
199 | 199 | |
200 | 200 | function readValueInTag() { |
201 | 201 | $ch = $this->iCurrentChar; |
202 | 202 | $value = ""; |
203 | 203 | if ($ch == "\"") { |
204 | - $this->skipMaxInTag ("\"", 1); |
|
205 | - $value = $this->skipToInTag ("\""); |
|
206 | - $this->skipMaxInTag ("\"", 1); |
|
204 | + $this->skipMaxInTag("\"", 1); |
|
205 | + $value = $this->skipToInTag("\""); |
|
206 | + $this->skipMaxInTag("\"", 1); |
|
207 | 207 | } |
208 | 208 | else if ($ch == "'") { |
209 | - $this->skipMaxInTag ("'", 1); |
|
210 | - $value = $this->skipToInTag ("'"); |
|
211 | - $this->skipMaxInTag ("'", 1); |
|
209 | + $this->skipMaxInTag("'", 1); |
|
210 | + $value = $this->skipToInTag("'"); |
|
211 | + $this->skipMaxInTag("'", 1); |
|
212 | 212 | } |
213 | 213 | else { |
214 | 214 | $value = $this->skipToBlanksInTag(); |
@@ -216,7 +216,7 @@ discard block |
||
216 | 216 | return $value; |
217 | 217 | } |
218 | 218 | |
219 | - function setTextIndex ($index) { |
|
219 | + function setTextIndex($index) { |
|
220 | 220 | $this->iHtmlTextIndex = $index; |
221 | 221 | if ($index >= $this->iHtmlTextLength) { |
222 | 222 | $this->iCurrentChar = -1; |
@@ -228,7 +228,7 @@ discard block |
||
228 | 228 | |
229 | 229 | function moveNext() { |
230 | 230 | if ($this->iHtmlTextIndex < $this->iHtmlTextLength) { |
231 | - $this->setTextIndex ($this->iHtmlTextIndex + 1); |
|
231 | + $this->setTextIndex($this->iHtmlTextIndex + 1); |
|
232 | 232 | return true; |
233 | 233 | } |
234 | 234 | else { |
@@ -246,7 +246,7 @@ discard block |
||
246 | 246 | } |
247 | 247 | } |
248 | 248 | |
249 | - function skipInTag ($chars) { |
|
249 | + function skipInTag($chars) { |
|
250 | 250 | $sb = ""; |
251 | 251 | while (($ch = $this->iCurrentChar) !== -1) { |
252 | 252 | if ($ch == ">") { |
@@ -269,7 +269,7 @@ discard block |
||
269 | 269 | return $sb; |
270 | 270 | } |
271 | 271 | |
272 | - function skipMaxInTag ($chars, $maxChars) { |
|
272 | + function skipMaxInTag($chars, $maxChars) { |
|
273 | 273 | $sb = ""; |
274 | 274 | $count = 0; |
275 | 275 | while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) { |
@@ -293,7 +293,7 @@ discard block |
||
293 | 293 | return $sb; |
294 | 294 | } |
295 | 295 | |
296 | - function skipToInTag ($chars) { |
|
296 | + function skipToInTag($chars) { |
|
297 | 297 | $sb = ""; |
298 | 298 | while (($ch = $this->iCurrentChar) !== -1) { |
299 | 299 | $match = $ch == ">"; |
@@ -332,34 +332,34 @@ discard block |
||
332 | 332 | * after the location of $needle, or not moved at all |
333 | 333 | * if nothing is found. |
334 | 334 | */ |
335 | - function skipToStringInTag ($needle) { |
|
336 | - $pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex); |
|
335 | + function skipToStringInTag($needle) { |
|
336 | + $pos = strpos($this->iHtmlText, $needle, $this->iHtmlTextIndex); |
|
337 | 337 | if ($pos === false) { |
338 | 338 | return ""; |
339 | 339 | } |
340 | 340 | $top = $pos + strlen($needle); |
341 | - $retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex); |
|
342 | - $this->setTextIndex ($top); |
|
341 | + $retvalue = substr($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex); |
|
342 | + $this->setTextIndex($top); |
|
343 | 343 | return $retvalue; |
344 | 344 | } |
345 | 345 | } |
346 | 346 | |
347 | -function HtmlParser_ForFile ($fileName) { |
|
347 | +function HtmlParser_ForFile($fileName) { |
|
348 | 348 | return HtmlParser_ForURL($fileName); |
349 | 349 | } |
350 | 350 | |
351 | -function HtmlParser_ForURL ($url) { |
|
352 | - $fp = fopen ($url, "r"); |
|
351 | +function HtmlParser_ForURL($url) { |
|
352 | + $fp = fopen($url, "r"); |
|
353 | 353 | $content = ""; |
354 | 354 | while (true) { |
355 | - $data = fread ($fp, 8192); |
|
355 | + $data = fread($fp, 8192); |
|
356 | 356 | if (strlen($data) == 0) { |
357 | 357 | break; |
358 | 358 | } |
359 | 359 | $content .= $data; |
360 | 360 | } |
361 | - fclose ($fp); |
|
362 | - return new HtmlParser ($content); |
|
361 | + fclose($fp); |
|
362 | + return new HtmlParser($content); |
|
363 | 363 | } |
364 | 364 | |
365 | 365 | php?> |
@@ -129,8 +129,7 @@ discard block |
||
129 | 129 | $this->iNodeName = "Comment"; |
130 | 130 | $this->iNodeValue = "<" . $name . ">"; |
131 | 131 | $comment = true; |
132 | - } |
|
133 | - else { |
|
132 | + } else { |
|
134 | 133 | $rest = $this->skipToStringInTag ("-->"); |
135 | 134 | if ($rest != "") { |
136 | 135 | $this->iNodeType = NODE_TYPE_COMMENT; |
@@ -148,8 +147,7 @@ discard block |
||
148 | 147 | $this->iNodeValue = "<" . $name; |
149 | 148 | return true; |
150 | 149 | } |
151 | - } |
|
152 | - else { |
|
150 | + } else { |
|
153 | 151 | $this->iNodeType = NODE_TYPE_ELEMENT; |
154 | 152 | $this->iNodeValue = ""; |
155 | 153 | $this->iNodeName = $name; |
@@ -162,8 +160,7 @@ discard block |
||
162 | 160 | $this->skipBlanksInTag(); |
163 | 161 | $value = $this->readValueInTag(); |
164 | 162 | $this->iNodeAttributes[strtolower($attrName)] = $value; |
165 | - } |
|
166 | - else { |
|
163 | + } else { |
|
167 | 164 | $this->iNodeAttributes[strtolower($attrName)] = ""; |
168 | 165 | } |
169 | 166 | } |
@@ -204,13 +201,11 @@ discard block |
||
204 | 201 | $this->skipMaxInTag ("\"", 1); |
205 | 202 | $value = $this->skipToInTag ("\""); |
206 | 203 | $this->skipMaxInTag ("\"", 1); |
207 | - } |
|
208 | - else if ($ch == "'") { |
|
204 | + } else if ($ch == "'") { |
|
209 | 205 | $this->skipMaxInTag ("'", 1); |
210 | 206 | $value = $this->skipToInTag ("'"); |
211 | 207 | $this->skipMaxInTag ("'", 1); |
212 | - } |
|
213 | - else { |
|
208 | + } else { |
|
214 | 209 | $value = $this->skipToBlanksInTag(); |
215 | 210 | } |
216 | 211 | return $value; |
@@ -220,8 +215,7 @@ discard block |
||
220 | 215 | $this->iHtmlTextIndex = $index; |
221 | 216 | if ($index >= $this->iHtmlTextLength) { |
222 | 217 | $this->iCurrentChar = -1; |
223 | - } |
|
224 | - else { |
|
218 | + } else { |
|
225 | 219 | $this->iCurrentChar = $this->iHtmlText{$index}; |
226 | 220 | } |
227 | 221 | } |
@@ -230,8 +224,7 @@ discard block |
||
230 | 224 | if ($this->iHtmlTextIndex < $this->iHtmlTextLength) { |
231 | 225 | $this->setTextIndex ($this->iHtmlTextIndex + 1); |
232 | 226 | return true; |
233 | - } |
|
234 | - else { |
|
227 | + } else { |
|
235 | 228 | return false; |
236 | 229 | } |
237 | 230 | } |