paquettg /
php-html-parser
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace PHPHtmlParser; |
||
| 6 | |||
| 7 | use PHPHtmlParser\Enum\StringToken; |
||
| 8 | use PHPHtmlParser\Exceptions\ContentLengthException; |
||
| 9 | use PHPHtmlParser\Exceptions\LogicalException; |
||
| 10 | |||
| 11 | /** |
||
| 12 | * Class Content. |
||
| 13 | */ |
||
| 14 | class Content |
||
| 15 | { |
||
| 16 | /** |
||
| 17 | * The content string. |
||
| 18 | * |
||
| 19 | * @var string |
||
| 20 | */ |
||
| 21 | protected $content; |
||
| 22 | |||
| 23 | /** |
||
| 24 | * The size of the content. |
||
| 25 | * |
||
| 26 | * @var int |
||
| 27 | */ |
||
| 28 | protected $size; |
||
| 29 | |||
| 30 | /** |
||
| 31 | * The current position we are in the content. |
||
| 32 | * |
||
| 33 | * @var int |
||
| 34 | */ |
||
| 35 | protected $pos; |
||
| 36 | |||
| 37 | /** |
||
| 38 | * The following 4 strings are tags that are important to us. |
||
| 39 | * |
||
| 40 | * @var string |
||
| 41 | */ |
||
| 42 | protected $blank = " \t\r\n"; |
||
| 43 | protected $equal = ' =/>'; |
||
| 44 | protected $slash = " />\r\n\t"; |
||
| 45 | protected $attr = ' >'; |
||
| 46 | |||
| 47 | /** |
||
| 48 | * Content constructor. |
||
| 49 | */ |
||
| 50 | 333 | public function __construct(string $content = '') |
|
| 51 | { |
||
| 52 | 333 | $this->content = $content; |
|
| 53 | 333 | $this->size = \strlen($content); |
|
| 54 | 333 | $this->pos = 0; |
|
| 55 | 333 | } |
|
| 56 | |||
| 57 | /** |
||
| 58 | * Returns the current position of the content. |
||
| 59 | */ |
||
| 60 | 75 | public function getPosition(): int |
|
| 61 | { |
||
| 62 | 75 | return $this->pos; |
|
| 63 | } |
||
| 64 | |||
| 65 | /** |
||
| 66 | * Gets the current character we are at. |
||
| 67 | * |
||
| 68 | * @param ?int $char |
||
| 69 | */ |
||
| 70 | 318 | public function char(?int $char = null): string |
|
| 71 | { |
||
| 72 | 318 | return $this->content[$char ?? $this->pos] ?? ''; |
|
| 73 | } |
||
| 74 | |||
| 75 | /** |
||
| 76 | * Gets a string from the current character position. |
||
| 77 | * |
||
| 78 | * @param int $length |
||
| 79 | * @return string |
||
| 80 | */ |
||
| 81 | 288 | public function string(int $length = 1): string |
|
| 82 | { |
||
| 83 | 288 | $string = ''; |
|
| 84 | 288 | $position = $this->pos; |
|
| 85 | do { |
||
| 86 | 288 | $string .= $this->char($position++); |
|
| 87 | 288 | } while ($position < $this->pos + $length); |
|
| 88 | 288 | return $string; |
|
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * Moves the current position forward. |
||
| 93 | * |
||
| 94 | * @throws ContentLengthException |
||
| 95 | */ |
||
| 96 | 303 | public function fastForward(int $count): Content |
|
| 97 | { |
||
| 98 | 303 | if (!$this->canFastForward($count)) { |
|
| 99 | // trying to go over the content length, throw exception |
||
| 100 | 3 | throw new ContentLengthException('Attempt to fastForward pass the length of the content.'); |
|
| 101 | } |
||
| 102 | 303 | $this->pos += $count; |
|
| 103 | |||
| 104 | 303 | return $this; |
|
| 105 | } |
||
| 106 | |||
| 107 | /** |
||
| 108 | * Checks if we can move the position forward. |
||
| 109 | */ |
||
| 110 | 303 | public function canFastForward(int $count): bool |
|
| 111 | { |
||
| 112 | 303 | return \strlen($this->content) >= $this->pos + $count; |
|
| 113 | } |
||
| 114 | |||
| 115 | /** |
||
| 116 | * Moves the current position backward. |
||
| 117 | */ |
||
| 118 | 27 | public function rewind(int $count): Content |
|
| 119 | { |
||
| 120 | 27 | $this->pos -= $count; |
|
| 121 | 27 | if ($this->pos < 0) { |
|
| 122 | 3 | $this->pos = 0; |
|
| 123 | } |
||
| 124 | |||
| 125 | 27 | return $this; |
|
| 126 | } |
||
| 127 | |||
| 128 | /** |
||
| 129 | * Copy the content until we find the given string. |
||
| 130 | */ |
||
| 131 | 309 | public function copyUntil(string $string, bool $char = false, bool $escape = false): string |
|
| 132 | { |
||
| 133 | 309 | if ($this->pos >= $this->size) { |
|
| 134 | // nothing left |
||
| 135 | 288 | return ''; |
|
| 136 | } |
||
| 137 | |||
| 138 | 309 | if ($escape) { |
|
| 139 | 261 | $position = $this->pos; |
|
| 140 | 261 | $found = false; |
|
| 141 | 261 | while (!$found) { |
|
| 142 | 261 | $position = \strpos($this->content, $string, $position); |
|
| 143 | 261 | if ($position === false) { |
|
| 144 | // reached the end |
||
| 145 | 240 | break; |
|
| 146 | } |
||
| 147 | |||
| 148 | 237 | if ($this->char($position - 1) == '\\') { |
|
| 149 | // this character is escaped |
||
| 150 | 3 | ++$position; |
|
| 151 | 3 | continue; |
|
| 152 | } |
||
| 153 | |||
| 154 | 237 | $found = true; |
|
| 155 | } |
||
| 156 | 306 | } elseif ($char) { |
|
| 157 | 294 | $position = \strcspn($this->content, $string, $this->pos); |
|
| 158 | 294 | $position += $this->pos; |
|
| 159 | } else { |
||
| 160 | 300 | $position = \strpos($this->content, $string, $this->pos); |
|
| 161 | } |
||
| 162 | |||
| 163 | 309 | if ($position === false) { |
|
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 164 | // could not find character, just return the remaining of the content |
||
| 165 | 249 | $return = \substr($this->content, $this->pos, $this->size - $this->pos); |
|
| 166 | 249 | if ($return === false) { |
|
| 167 | throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); |
||
| 168 | } |
||
| 169 | 249 | $this->pos = $this->size; |
|
| 170 | |||
| 171 | 249 | return $return; |
|
| 172 | } |
||
| 173 | |||
| 174 | 300 | if ($position == $this->pos) { |
|
| 175 | // we are at the right place |
||
| 176 | 288 | return ''; |
|
| 177 | } |
||
| 178 | |||
| 179 | 300 | $return = \substr($this->content, $this->pos, $position - $this->pos); |
|
| 180 | 300 | if ($return === false) { |
|
| 181 | throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); |
||
| 182 | } |
||
| 183 | // set the new position |
||
| 184 | 300 | $this->pos = $position; |
|
| 185 | |||
| 186 | 300 | return $return; |
|
| 187 | } |
||
| 188 | |||
| 189 | /** |
||
| 190 | * Copies the content until the string is found and return it |
||
| 191 | * unless the 'unless' is found in the substring. |
||
| 192 | */ |
||
| 193 | 258 | public function copyUntilUnless(string $string, string $unless): string |
|
| 194 | { |
||
| 195 | 258 | $lastPos = $this->pos; |
|
| 196 | 258 | $this->fastForward(1); |
|
| 197 | 258 | $foundString = $this->copyUntil($string, true, true); |
|
| 198 | |||
| 199 | 258 | $position = \strcspn($foundString, $unless); |
|
| 200 | 258 | if ($position == \strlen($foundString)) { |
|
| 201 | 69 | return $string . $foundString; |
|
| 202 | } |
||
| 203 | // rewind changes and return nothing |
||
| 204 | 258 | $this->pos = $lastPos; |
|
| 205 | |||
| 206 | 258 | return ''; |
|
| 207 | } |
||
| 208 | |||
| 209 | /** |
||
| 210 | * Copies the content until it reaches the token string.,. |
||
| 211 | * |
||
| 212 | * @uses $this->copyUntil() |
||
| 213 | */ |
||
| 214 | 291 | public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string |
|
| 215 | { |
||
| 216 | 291 | $string = $stringToken->getValue(); |
|
| 217 | |||
| 218 | 291 | return $this->copyUntil($string, $char, $escape); |
|
| 219 | } |
||
| 220 | |||
| 221 | /** |
||
| 222 | * Skip a given set of characters. |
||
| 223 | * |
||
| 224 | * @throws LogicalException |
||
| 225 | */ |
||
| 226 | 297 | public function skip(string $string, bool $copy = false): string |
|
| 227 | { |
||
| 228 | 297 | $len = \strspn($this->content, $string, $this->pos); |
|
| 229 | 297 | if ($len === false) { |
|
| 230 | throw new LogicalException('Strspn returned false with position ' . $this->pos . '.'); |
||
| 231 | } |
||
| 232 | 297 | $return = ''; |
|
| 233 | 297 | if ($copy) { |
|
| 234 | 273 | $return = \substr($this->content, $this->pos, $len); |
|
| 235 | 273 | if ($return === false) { |
|
| 236 | throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); |
||
| 237 | } |
||
| 238 | } |
||
| 239 | |||
| 240 | // update the position |
||
| 241 | 297 | $this->pos += $len; |
|
| 242 | |||
| 243 | 297 | return $return; |
|
| 244 | } |
||
| 245 | |||
| 246 | /** |
||
| 247 | * Skip a given token of pre-defined characters. |
||
| 248 | * |
||
| 249 | * @uses $this->skip() |
||
| 250 | */ |
||
| 251 | 291 | public function skipByToken(StringToken $skipToken, bool $copy = false): string |
|
| 252 | { |
||
| 253 | 291 | $string = $skipToken->getValue(); |
|
| 254 | |||
| 255 | 291 | return $this->skip($string, $copy); |
|
| 256 | } |
||
| 257 | } |
||
| 258 |