1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace PHPHtmlParser; |
||
6 | |||
7 | use PHPHtmlParser\Enum\StringToken; |
||
8 | use PHPHtmlParser\Exceptions\ContentLengthException; |
||
9 | use PHPHtmlParser\Exceptions\LogicalException; |
||
10 | |||
11 | /** |
||
12 | * Class Content. |
||
13 | */ |
||
14 | class Content |
||
15 | { |
||
16 | /** |
||
17 | * The content string. |
||
18 | * |
||
19 | * @var string |
||
20 | */ |
||
21 | protected $content; |
||
22 | |||
23 | /** |
||
24 | * The size of the content. |
||
25 | * |
||
26 | * @var int |
||
27 | */ |
||
28 | protected $size; |
||
29 | |||
30 | /** |
||
31 | * The current position we are in the content. |
||
32 | * |
||
33 | * @var int |
||
34 | */ |
||
35 | protected $pos; |
||
36 | |||
37 | /** |
||
38 | * The following 4 strings are tags that are important to us. |
||
39 | * |
||
40 | * @var string |
||
41 | */ |
||
42 | protected $blank = " \t\r\n"; |
||
43 | protected $equal = ' =/>'; |
||
44 | protected $slash = " />\r\n\t"; |
||
45 | protected $attr = ' >'; |
||
46 | |||
47 | /** |
||
48 | * Content constructor. |
||
49 | */ |
||
50 | 333 | public function __construct(string $content = '') |
|
51 | { |
||
52 | 333 | $this->content = $content; |
|
53 | 333 | $this->size = \strlen($content); |
|
54 | 333 | $this->pos = 0; |
|
55 | 333 | } |
|
56 | |||
57 | /** |
||
58 | * Returns the current position of the content. |
||
59 | */ |
||
60 | 75 | public function getPosition(): int |
|
61 | { |
||
62 | 75 | return $this->pos; |
|
63 | } |
||
64 | |||
65 | /** |
||
66 | * Gets the current character we are at. |
||
67 | * |
||
68 | * @param ?int $char |
||
69 | */ |
||
70 | 318 | public function char(?int $char = null): string |
|
71 | { |
||
72 | 318 | return $this->content[$char ?? $this->pos] ?? ''; |
|
73 | } |
||
74 | |||
75 | /** |
||
76 | * Gets a string from the current character position. |
||
77 | * |
||
78 | * @param int $length |
||
79 | * @return string |
||
80 | */ |
||
81 | 288 | public function string(int $length = 1): string |
|
82 | { |
||
83 | 288 | $string = ''; |
|
84 | 288 | $position = $this->pos; |
|
85 | do { |
||
86 | 288 | $string .= $this->char($position++); |
|
87 | 288 | } while ($position < $this->pos + $length); |
|
88 | 288 | return $string; |
|
89 | } |
||
90 | |||
91 | /** |
||
92 | * Moves the current position forward. |
||
93 | * |
||
94 | * @throws ContentLengthException |
||
95 | */ |
||
96 | 303 | public function fastForward(int $count): Content |
|
97 | { |
||
98 | 303 | if (!$this->canFastForward($count)) { |
|
99 | // trying to go over the content length, throw exception |
||
100 | 3 | throw new ContentLengthException('Attempt to fastForward pass the length of the content.'); |
|
101 | } |
||
102 | 303 | $this->pos += $count; |
|
103 | |||
104 | 303 | return $this; |
|
105 | } |
||
106 | |||
107 | /** |
||
108 | * Checks if we can move the position forward. |
||
109 | */ |
||
110 | 303 | public function canFastForward(int $count): bool |
|
111 | { |
||
112 | 303 | return \strlen($this->content) >= $this->pos + $count; |
|
113 | } |
||
114 | |||
115 | /** |
||
116 | * Moves the current position backward. |
||
117 | */ |
||
118 | 27 | public function rewind(int $count): Content |
|
119 | { |
||
120 | 27 | $this->pos -= $count; |
|
121 | 27 | if ($this->pos < 0) { |
|
122 | 3 | $this->pos = 0; |
|
123 | } |
||
124 | |||
125 | 27 | return $this; |
|
126 | } |
||
127 | |||
128 | /** |
||
129 | * Copy the content until we find the given string. |
||
130 | */ |
||
131 | 309 | public function copyUntil(string $string, bool $char = false, bool $escape = false): string |
|
132 | { |
||
133 | 309 | if ($this->pos >= $this->size) { |
|
134 | // nothing left |
||
135 | 288 | return ''; |
|
136 | } |
||
137 | |||
138 | 309 | if ($escape) { |
|
139 | 261 | $position = $this->pos; |
|
140 | 261 | $found = false; |
|
141 | 261 | while (!$found) { |
|
142 | 261 | $position = \strpos($this->content, $string, $position); |
|
143 | 261 | if ($position === false) { |
|
144 | // reached the end |
||
145 | 240 | break; |
|
146 | } |
||
147 | |||
148 | 237 | if ($this->char($position - 1) == '\\') { |
|
149 | // this character is escaped |
||
150 | 3 | ++$position; |
|
151 | 3 | continue; |
|
152 | } |
||
153 | |||
154 | 237 | $found = true; |
|
155 | } |
||
156 | 306 | } elseif ($char) { |
|
157 | 294 | $position = \strcspn($this->content, $string, $this->pos); |
|
158 | 294 | $position += $this->pos; |
|
159 | } else { |
||
160 | 300 | $position = \strpos($this->content, $string, $this->pos); |
|
161 | } |
||
162 | |||
163 | 309 | if ($position === false) { |
|
0 ignored issues
–
show
introduced
by
![]() |
|||
164 | // could not find character, just return the remaining of the content |
||
165 | 249 | $return = \substr($this->content, $this->pos, $this->size - $this->pos); |
|
166 | 249 | if ($return === false) { |
|
167 | throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); |
||
168 | } |
||
169 | 249 | $this->pos = $this->size; |
|
170 | |||
171 | 249 | return $return; |
|
172 | } |
||
173 | |||
174 | 300 | if ($position == $this->pos) { |
|
175 | // we are at the right place |
||
176 | 288 | return ''; |
|
177 | } |
||
178 | |||
179 | 300 | $return = \substr($this->content, $this->pos, $position - $this->pos); |
|
180 | 300 | if ($return === false) { |
|
181 | throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); |
||
182 | } |
||
183 | // set the new position |
||
184 | 300 | $this->pos = $position; |
|
185 | |||
186 | 300 | return $return; |
|
187 | } |
||
188 | |||
189 | /** |
||
190 | * Copies the content until the string is found and return it |
||
191 | * unless the 'unless' is found in the substring. |
||
192 | */ |
||
193 | 258 | public function copyUntilUnless(string $string, string $unless): string |
|
194 | { |
||
195 | 258 | $lastPos = $this->pos; |
|
196 | 258 | $this->fastForward(1); |
|
197 | 258 | $foundString = $this->copyUntil($string, true, true); |
|
198 | |||
199 | 258 | $position = \strcspn($foundString, $unless); |
|
200 | 258 | if ($position == \strlen($foundString)) { |
|
201 | 69 | return $string . $foundString; |
|
202 | } |
||
203 | // rewind changes and return nothing |
||
204 | 258 | $this->pos = $lastPos; |
|
205 | |||
206 | 258 | return ''; |
|
207 | } |
||
208 | |||
209 | /** |
||
210 | * Copies the content until it reaches the token string.,. |
||
211 | * |
||
212 | * @uses $this->copyUntil() |
||
213 | */ |
||
214 | 291 | public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string |
|
215 | { |
||
216 | 291 | $string = $stringToken->getValue(); |
|
217 | |||
218 | 291 | return $this->copyUntil($string, $char, $escape); |
|
219 | } |
||
220 | |||
221 | /** |
||
222 | * Skip a given set of characters. |
||
223 | * |
||
224 | * @throws LogicalException |
||
225 | */ |
||
226 | 297 | public function skip(string $string, bool $copy = false): string |
|
227 | { |
||
228 | 297 | $len = \strspn($this->content, $string, $this->pos); |
|
229 | 297 | if ($len === false) { |
|
230 | throw new LogicalException('Strspn returned false with position ' . $this->pos . '.'); |
||
231 | } |
||
232 | 297 | $return = ''; |
|
233 | 297 | if ($copy) { |
|
234 | 273 | $return = \substr($this->content, $this->pos, $len); |
|
235 | 273 | if ($return === false) { |
|
236 | throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); |
||
237 | } |
||
238 | } |
||
239 | |||
240 | // update the position |
||
241 | 297 | $this->pos += $len; |
|
242 | |||
243 | 297 | return $return; |
|
244 | } |
||
245 | |||
246 | /** |
||
247 | * Skip a given token of pre-defined characters. |
||
248 | * |
||
249 | * @uses $this->skip() |
||
250 | */ |
||
251 | 291 | public function skipByToken(StringToken $skipToken, bool $copy = false): string |
|
252 | { |
||
253 | 291 | $string = $skipToken->getValue(); |
|
254 | |||
255 | 291 | return $this->skip($string, $copy); |
|
256 | } |
||
257 | } |
||
258 |