| Total Complexity | 149 |
| Total Lines | 747 |
| Duplicated Lines | 0 % |
| Changes | 2 | ||
| Bugs | 2 | Features | 0 |
Complex classes like PDFObject often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PDFObject, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 41 | class PDFObject |
||
| 42 | { |
||
| 43 | const TYPE = 't'; |
||
| 44 | |||
| 45 | const OPERATOR = 'o'; |
||
| 46 | |||
| 47 | const COMMAND = 'c'; |
||
| 48 | |||
| 49 | /** |
||
| 50 | * The recursion stack. |
||
| 51 | * |
||
| 52 | * @var array |
||
| 53 | */ |
||
| 54 | static $recursionStack = array(); |
||
| 55 | |||
| 56 | /** |
||
| 57 | * @var Document |
||
| 58 | */ |
||
| 59 | protected $document = null; |
||
| 60 | |||
| 61 | /** |
||
| 62 | * @var Header |
||
| 63 | */ |
||
| 64 | protected $header = null; |
||
| 65 | |||
| 66 | /** |
||
| 67 | * @var string |
||
| 68 | */ |
||
| 69 | protected $content = null; |
||
| 70 | |||
| 71 | /** |
||
| 72 | * @param Document $document |
||
| 73 | * @param Header $header |
||
| 74 | * @param string $content |
||
| 75 | */ |
||
| 76 | public function __construct(Document $document, Header $header = null, $content = null) |
||
| 77 | { |
||
| 78 | $this->document = $document; |
||
| 79 | $this->header = !is_null($header) ? $header : new Header(); |
||
| 80 | $this->content = $content; |
||
| 81 | } |
||
| 82 | |||
| 83 | /** |
||
| 84 | * |
||
| 85 | */ |
||
| 86 | public function init() |
||
| 87 | { |
||
| 88 | |||
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * @return null|Header |
||
| 93 | */ |
||
| 94 | public function getHeader() |
||
| 95 | { |
||
| 96 | return $this->header; |
||
| 97 | } |
||
| 98 | |||
| 99 | /** |
||
| 100 | * @param string $name |
||
| 101 | * |
||
| 102 | * @return Element|PDFObject |
||
| 103 | */ |
||
| 104 | public function get($name) |
||
| 105 | { |
||
| 106 | return $this->header->get($name); |
||
| 107 | } |
||
| 108 | |||
| 109 | /** |
||
| 110 | * @param $name |
||
| 111 | * |
||
| 112 | * @return bool |
||
| 113 | */ |
||
| 114 | public function has($name) |
||
| 115 | { |
||
| 116 | return $this->header->has($name); |
||
| 117 | } |
||
| 118 | |||
| 119 | /** |
||
| 120 | * @param bool $deep |
||
| 121 | * |
||
| 122 | * @return array |
||
| 123 | */ |
||
| 124 | public function getDetails($deep = true) |
||
| 125 | { |
||
| 126 | return $this->header->getDetails($deep); |
||
| 127 | } |
||
| 128 | |||
| 129 | /** |
||
| 130 | * @return null|string |
||
| 131 | */ |
||
| 132 | public function getContent() |
||
| 135 | } |
||
| 136 | |||
| 137 | /** |
||
| 138 | * @param $content |
||
| 139 | */ |
||
| 140 | public function cleanContent($content, $char = 'X') |
||
| 141 | { |
||
| 142 | $char = $char[0]; |
||
| 143 | $content = str_replace(array('\\\\', '\\)', '\\('), $char . $char, $content); |
||
| 144 | |||
| 145 | // Remove image bloc with binary content |
||
| 146 | preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE); |
||
| 147 | foreach ($matches[0] as $part) { |
||
| 148 | $content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0])); |
||
| 149 | } |
||
| 150 | |||
| 151 | // Clean content in square brackets [.....] |
||
| 152 | preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE); |
||
| 153 | foreach ($matches[1] as $part) { |
||
| 154 | $content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0])); |
||
| 155 | } |
||
| 156 | |||
| 157 | // Clean content in round brackets (.....) |
||
| 158 | preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE); |
||
| 159 | foreach ($matches[1] as $part) { |
||
| 160 | $content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0])); |
||
| 161 | } |
||
| 162 | |||
| 163 | // Clean structure |
||
| 164 | if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) { |
||
| 165 | $content = ''; |
||
| 166 | $level = 0; |
||
| 167 | foreach ($parts as $part) { |
||
| 168 | if ($part == '<') { |
||
| 169 | $level++; |
||
| 170 | } |
||
| 171 | |||
| 172 | $content .= ($level == 0 ? $part : str_repeat($char, strlen($part))); |
||
| 173 | |||
| 174 | if ($part == '>') { |
||
| 175 | $level--; |
||
| 176 | } |
||
| 177 | } |
||
| 178 | } |
||
| 179 | |||
| 180 | // Clean BDC and EMC markup |
||
| 181 | preg_match_all( |
||
| 182 | '/(\/[A-Za-z0-9\_]*\s*' . preg_quote($char) . '*BDC)/s', |
||
| 183 | $content, |
||
| 184 | $matches, |
||
| 185 | PREG_OFFSET_CAPTURE |
||
| 186 | ); |
||
| 187 | foreach ($matches[1] as $part) { |
||
| 188 | $content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0])); |
||
| 189 | } |
||
| 190 | |||
| 191 | preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE); |
||
| 192 | foreach ($matches[1] as $part) { |
||
| 193 | $content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0])); |
||
| 194 | } |
||
| 195 | |||
| 196 | return $content; |
||
| 197 | } |
||
| 198 | |||
| 199 | /** |
||
| 200 | * @param $content |
||
| 201 | * |
||
| 202 | * @return array |
||
| 203 | */ |
||
| 204 | public function getSectionsText($content) |
||
| 205 | { |
||
| 206 | $sections = array(); |
||
| 207 | $content = ' ' . $content . ' '; |
||
| 208 | $textCleaned = $this->cleanContent($content, '_'); |
||
| 209 | |||
| 210 | // Extract text blocks. |
||
| 211 | if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) { |
||
| 212 | foreach ($matches[1] as $part) { |
||
| 213 | $text = $part[0]; |
||
| 214 | if ($text === '') { |
||
| 215 | continue; |
||
| 216 | } |
||
| 217 | $offset = $part[1]; |
||
| 218 | $section = substr($content, $offset, strlen($text)); |
||
| 219 | |||
| 220 | // Removes BDC and EMC markup. |
||
| 221 | $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section . ' '); |
||
| 222 | |||
| 223 | $sections[] = $section; |
||
| 224 | } |
||
| 225 | } |
||
| 226 | |||
| 227 | // Extract 'do' commands. |
||
| 228 | if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) { |
||
| 229 | foreach ($matches[1] as $part) { |
||
| 230 | $text = $part[0]; |
||
| 231 | $offset = $part[1]; |
||
| 232 | $section = substr($content, $offset, strlen($text)); |
||
| 233 | |||
| 234 | $sections[] = $section; |
||
| 235 | } |
||
| 236 | } |
||
| 237 | |||
| 238 | return $sections; |
||
| 239 | } |
||
| 240 | |||
| 241 | /** |
||
| 242 | * @param Page |
||
| 243 | * |
||
| 244 | * @return string |
||
| 245 | * @throws \Exception |
||
| 246 | */ |
||
| 247 | public function getText(Page $page = null) |
||
| 248 | { |
||
| 249 | $text = ''; |
||
| 250 | $sections = $this->getSectionsText($this->content); |
||
| 251 | $current_font = null; |
||
| 252 | |||
| 253 | foreach ($this->document->getObjects() as $obj) { |
||
| 254 | if ($obj instanceof Font) { |
||
| 255 | $current_font = $obj; |
||
| 256 | break; |
||
| 257 | } |
||
| 258 | } |
||
| 259 | |||
| 260 | if ($current_font === null) { |
||
| 261 | $current_font = new Font($this->document); |
||
| 262 | } |
||
| 263 | |||
| 264 | $current_position_td = array('x' => false, 'y' => false); |
||
| 265 | $current_position_tm = array('x' => false, 'y' => false); |
||
| 266 | |||
| 267 | array_push(self::$recursionStack, $this->getUniqueId()); |
||
| 268 | |||
| 269 | foreach ($sections as $section) { |
||
| 270 | |||
| 271 | $commands = $this->getCommandsText($section); |
||
| 272 | |||
| 273 | foreach ($commands as $command) { |
||
| 274 | |||
| 275 | switch ($command[self::OPERATOR]) { |
||
| 276 | // set character spacing |
||
| 277 | case 'Tc': |
||
| 278 | break; |
||
| 279 | |||
| 280 | // move text current point |
||
| 281 | case 'Td': |
||
| 282 | $args = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 283 | $y = array_pop($args); |
||
|
|
|||
| 284 | $x = array_pop($args); |
||
| 285 | if ((floatval($x) <= 0) || |
||
| 286 | ($current_position_td['y'] !== false && floatval($y) < floatval($current_position_td['y'])) |
||
| 287 | ) { |
||
| 288 | // vertical offset |
||
| 289 | $text .= "\n"; |
||
| 290 | } elseif ($current_position_td['x'] !== false && floatval($x) > floatval( |
||
| 291 | $current_position_td['x'] |
||
| 292 | ) |
||
| 293 | ) { |
||
| 294 | // horizontal offset |
||
| 295 | $text .= ' '; |
||
| 296 | } |
||
| 297 | $current_position_td = array('x' => $x, 'y' => $y); |
||
| 298 | break; |
||
| 299 | |||
| 300 | // move text current point and set leading |
||
| 301 | case 'TD': |
||
| 302 | $args = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 303 | $y = array_pop($args); |
||
| 304 | $x = array_pop($args); |
||
| 305 | if (floatval($y) < 0) { |
||
| 306 | $text .= "\n"; |
||
| 307 | } elseif (floatval($x) <= 0) { |
||
| 308 | $text .= ' '; |
||
| 309 | } |
||
| 310 | break; |
||
| 311 | |||
| 312 | case 'Tf': |
||
| 313 | list($id,) = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 314 | $id = trim($id, '/'); |
||
| 315 | if (!is_null($page)) { |
||
| 316 | $current_font = $page->getFont($id); |
||
| 317 | } |
||
| 318 | break; |
||
| 319 | |||
| 320 | case "'": |
||
| 321 | case 'Tj': |
||
| 322 | $command[self::COMMAND] = array($command); |
||
| 323 | case 'TJ': |
||
| 324 | // Skip if not previously defined, should never happened. |
||
| 325 | if (is_null($current_font)) { |
||
| 326 | // Fallback |
||
| 327 | // TODO : Improve |
||
| 328 | $text .= $command[self::COMMAND][0][self::COMMAND]; |
||
| 329 | break; |
||
| 330 | } |
||
| 331 | |||
| 332 | $sub_text = $current_font->decodeText($command[self::COMMAND]); |
||
| 333 | $text .= $sub_text; |
||
| 334 | break; |
||
| 335 | |||
| 336 | // set leading |
||
| 337 | case 'TL': |
||
| 338 | $text .= ' '; |
||
| 339 | break; |
||
| 340 | |||
| 341 | case 'Tm': |
||
| 342 | $args = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 343 | $y = array_pop($args); |
||
| 344 | $x = array_pop($args); |
||
| 345 | if ($current_position_tm['x'] !== false) { |
||
| 346 | $delta = abs(floatval($x) - floatval($current_position_tm['x'])); |
||
| 347 | if ($delta > 10) { |
||
| 348 | $text .= "\t"; |
||
| 349 | } |
||
| 350 | } |
||
| 351 | if ($current_position_tm['y'] !== false) { |
||
| 352 | $delta = abs(floatval($y) - floatval($current_position_tm['y'])); |
||
| 353 | if ($delta > 10) { |
||
| 354 | $text .= "\n"; |
||
| 355 | } |
||
| 356 | } |
||
| 357 | $current_position_tm = array('x' => $x, 'y' => $y); |
||
| 358 | break; |
||
| 359 | |||
| 360 | // set super/subscripting text rise |
||
| 361 | case 'Ts': |
||
| 362 | break; |
||
| 363 | |||
| 364 | // set word spacing |
||
| 365 | case 'Tw': |
||
| 366 | break; |
||
| 367 | |||
| 368 | // set horizontal scaling |
||
| 369 | case 'Tz': |
||
| 370 | $text .= "\n"; |
||
| 371 | break; |
||
| 372 | |||
| 373 | // move to start of next line |
||
| 374 | case 'T*': |
||
| 375 | $text .= "\n"; |
||
| 376 | break; |
||
| 377 | |||
| 378 | case 'Da': |
||
| 379 | break; |
||
| 380 | |||
| 381 | case 'Do': |
||
| 382 | if (!is_null($page)) { |
||
| 383 | $args = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 384 | $id = trim(array_pop($args), '/ '); |
||
| 385 | $xobject = $page->getXObject($id); |
||
| 386 | |||
| 387 | |||
| 388 | // @todo $xobject could be a ElementXRef object, which would then throw an error |
||
| 389 | if ( is_object($xobject) && $xobject instanceof PDFObject && !in_array($xobject->getUniqueId(), self::$recursionStack) ) { |
||
| 390 | // Not a circular reference. |
||
| 391 | $text .= $xobject->getText($page); |
||
| 392 | } |
||
| 393 | } |
||
| 394 | break; |
||
| 395 | |||
| 396 | case 'rg': |
||
| 397 | case 'RG': |
||
| 398 | break; |
||
| 399 | |||
| 400 | case 're': |
||
| 401 | break; |
||
| 402 | |||
| 403 | case 'co': |
||
| 404 | break; |
||
| 405 | |||
| 406 | case 'cs': |
||
| 407 | break; |
||
| 408 | |||
| 409 | case 'gs': |
||
| 410 | break; |
||
| 411 | |||
| 412 | case 'en': |
||
| 413 | break; |
||
| 414 | |||
| 415 | case 'sc': |
||
| 416 | case 'SC': |
||
| 417 | break; |
||
| 418 | |||
| 419 | case 'g': |
||
| 420 | case 'G': |
||
| 421 | break; |
||
| 422 | |||
| 423 | case 'V': |
||
| 424 | break; |
||
| 425 | |||
| 426 | case 'vo': |
||
| 427 | case 'Vo': |
||
| 428 | break; |
||
| 429 | |||
| 430 | default: |
||
| 431 | } |
||
| 432 | } |
||
| 433 | } |
||
| 434 | |||
| 435 | array_pop(self::$recursionStack); |
||
| 436 | |||
| 437 | return $text . ' '; |
||
| 438 | } |
||
| 439 | |||
| 440 | /** |
||
| 441 | * @param Page |
||
| 442 | * |
||
| 443 | * @return array |
||
| 444 | * @throws \Exception |
||
| 445 | */ |
||
| 446 | public function getTextArray(Page $page = null) |
||
| 447 | { |
||
| 448 | $text = array(); |
||
| 449 | $sections = $this->getSectionsText($this->content); |
||
| 450 | $current_font = new Font($this->document); |
||
| 451 | |||
| 452 | foreach ($sections as $section) { |
||
| 453 | |||
| 454 | $commands = $this->getCommandsText($section); |
||
| 455 | |||
| 456 | foreach ($commands as $command) { |
||
| 457 | |||
| 458 | switch ($command[self::OPERATOR]) { |
||
| 459 | // set character spacing |
||
| 460 | case 'Tc': |
||
| 461 | break; |
||
| 462 | |||
| 463 | // move text current point |
||
| 464 | case 'Td': |
||
| 465 | break; |
||
| 466 | |||
| 467 | // move text current point and set leading |
||
| 468 | case 'TD': |
||
| 469 | break; |
||
| 470 | |||
| 471 | case 'Tf': |
||
| 472 | list($id,) = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 473 | $id = trim($id, '/'); |
||
| 474 | $current_font = $page->getFont($id); |
||
| 475 | break; |
||
| 476 | |||
| 477 | case "'": |
||
| 478 | case 'Tj': |
||
| 479 | $command[self::COMMAND] = array($command); |
||
| 480 | case 'TJ': |
||
| 481 | // Skip if not previously defined, should never happened. |
||
| 482 | if (is_null($current_font)) { |
||
| 483 | // Fallback |
||
| 484 | // TODO : Improve |
||
| 485 | $text[] = $command[self::COMMAND][0][self::COMMAND]; |
||
| 486 | break; |
||
| 487 | } |
||
| 488 | |||
| 489 | $sub_text = $current_font->decodeText($command[self::COMMAND]); |
||
| 490 | $text[] = $sub_text; |
||
| 491 | break; |
||
| 492 | |||
| 493 | // set leading |
||
| 494 | case 'TL': |
||
| 495 | break; |
||
| 496 | |||
| 497 | case 'Tm': |
||
| 498 | break; |
||
| 499 | |||
| 500 | // set super/subscripting text rise |
||
| 501 | case 'Ts': |
||
| 502 | break; |
||
| 503 | |||
| 504 | // set word spacing |
||
| 505 | case 'Tw': |
||
| 506 | break; |
||
| 507 | |||
| 508 | // set horizontal scaling |
||
| 509 | case 'Tz': |
||
| 510 | //$text .= "\n"; |
||
| 511 | break; |
||
| 512 | |||
| 513 | // move to start of next line |
||
| 514 | case 'T*': |
||
| 515 | //$text .= "\n"; |
||
| 516 | break; |
||
| 517 | |||
| 518 | case 'Da': |
||
| 519 | break; |
||
| 520 | |||
| 521 | case 'Do': |
||
| 522 | if (!is_null($page)) { |
||
| 523 | $args = preg_split('/\s/s', $command[self::COMMAND]); |
||
| 524 | $id = trim(array_pop($args), '/ '); |
||
| 525 | if ($xobject = $page->getXObject($id)) { |
||
| 526 | $text[] = $xobject->getText($page); |
||
| 527 | } |
||
| 528 | } |
||
| 529 | break; |
||
| 530 | |||
| 531 | case 'rg': |
||
| 532 | case 'RG': |
||
| 533 | break; |
||
| 534 | |||
| 535 | case 're': |
||
| 536 | break; |
||
| 537 | |||
| 538 | case 'co': |
||
| 539 | break; |
||
| 540 | |||
| 541 | case 'cs': |
||
| 542 | break; |
||
| 543 | |||
| 544 | case 'gs': |
||
| 545 | break; |
||
| 546 | |||
| 547 | case 'en': |
||
| 548 | break; |
||
| 549 | |||
| 550 | case 'sc': |
||
| 551 | case 'SC': |
||
| 552 | break; |
||
| 553 | |||
| 554 | case 'g': |
||
| 555 | case 'G': |
||
| 556 | break; |
||
| 557 | |||
| 558 | case 'V': |
||
| 559 | break; |
||
| 560 | |||
| 561 | case 'vo': |
||
| 562 | case 'Vo': |
||
| 563 | break; |
||
| 564 | |||
| 565 | default: |
||
| 566 | } |
||
| 567 | } |
||
| 568 | } |
||
| 569 | |||
| 570 | return $text; |
||
| 571 | } |
||
| 572 | |||
| 573 | |||
| 574 | /** |
||
| 575 | * @param string $text_part |
||
| 576 | * @param int $offset |
||
| 577 | * |
||
| 578 | * @return array |
||
| 579 | */ |
||
| 580 | public function getCommandsText($text_part, &$offset = 0) |
||
| 581 | { |
||
| 582 | $commands = $matches = array(); |
||
| 583 | |||
| 584 | while ($offset < strlen($text_part)) { |
||
| 585 | $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset); |
||
| 586 | $char = $text_part[$offset]; |
||
| 587 | |||
| 588 | $operator = ''; |
||
| 589 | $type = ''; |
||
| 590 | $command = false; |
||
| 591 | |||
| 592 | switch ($char) { |
||
| 593 | case '/': |
||
| 594 | $type = $char; |
||
| 595 | if (preg_match( |
||
| 596 | '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si', |
||
| 597 | substr($text_part, $offset), |
||
| 598 | $matches |
||
| 599 | ) |
||
| 600 | ) { |
||
| 601 | $operator = $matches[2]; |
||
| 602 | $command = $matches[1]; |
||
| 603 | $offset += strlen($matches[0]); |
||
| 604 | } elseif (preg_match( |
||
| 605 | '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si', |
||
| 606 | substr($text_part, $offset), |
||
| 607 | $matches |
||
| 608 | ) |
||
| 609 | ) { |
||
| 610 | $operator = $matches[2]; |
||
| 611 | $command = $matches[1]; |
||
| 612 | $offset += strlen($matches[0]); |
||
| 613 | } |
||
| 614 | break; |
||
| 615 | |||
| 616 | case '[': |
||
| 617 | case ']': |
||
| 618 | // array object |
||
| 619 | $type = $char; |
||
| 620 | if ($char == '[') { |
||
| 621 | ++$offset; |
||
| 622 | // get elements |
||
| 623 | $command = $this->getCommandsText($text_part, $offset); |
||
| 624 | |||
| 625 | if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) { |
||
| 626 | $operator = trim($matches[0]); |
||
| 627 | $offset += strlen($matches[0]); |
||
| 628 | } |
||
| 629 | } else { |
||
| 630 | ++$offset; |
||
| 631 | break; |
||
| 632 | } |
||
| 633 | break; |
||
| 634 | |||
| 635 | case '<': |
||
| 636 | case '>': |
||
| 637 | // array object |
||
| 638 | $type = $char; |
||
| 639 | ++$offset; |
||
| 640 | if ($char == '<') { |
||
| 641 | $strpos = strpos($text_part, '>', $offset); |
||
| 642 | $command = substr($text_part, $offset, ($strpos - $offset)); |
||
| 643 | $offset = $strpos + 1; |
||
| 644 | } |
||
| 645 | |||
| 646 | if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) { |
||
| 647 | $operator = trim($matches[0]); |
||
| 648 | $offset += strlen($matches[0]); |
||
| 649 | } |
||
| 650 | break; |
||
| 651 | |||
| 652 | case '(': |
||
| 653 | case ')': |
||
| 654 | ++$offset; |
||
| 655 | $type = $char; |
||
| 656 | $strpos = $offset; |
||
| 657 | if ($char == '(') { |
||
| 658 | $open_bracket = 1; |
||
| 659 | while ($open_bracket > 0) { |
||
| 660 | if (!isset($text_part[$strpos])) { |
||
| 661 | break; |
||
| 662 | } |
||
| 663 | $ch = $text_part[$strpos]; |
||
| 664 | switch ($ch) { |
||
| 665 | case '\\': |
||
| 666 | { // REVERSE SOLIDUS (5Ch) (Backslash) |
||
| 667 | // skip next character |
||
| 668 | ++$strpos; |
||
| 669 | break; |
||
| 670 | } |
||
| 671 | case '(': |
||
| 672 | { // LEFT PARENHESIS (28h) |
||
| 673 | ++$open_bracket; |
||
| 674 | break; |
||
| 675 | } |
||
| 676 | case ')': |
||
| 677 | { // RIGHT PARENTHESIS (29h) |
||
| 678 | --$open_bracket; |
||
| 679 | break; |
||
| 680 | } |
||
| 681 | } |
||
| 682 | ++$strpos; |
||
| 683 | } |
||
| 684 | $command = substr($text_part, $offset, ($strpos - $offset - 1)); |
||
| 685 | $offset = $strpos; |
||
| 686 | |||
| 687 | if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) { |
||
| 688 | $operator = $matches[1]; |
||
| 689 | $offset += strlen($matches[0]); |
||
| 690 | } |
||
| 691 | } |
||
| 692 | break; |
||
| 693 | |||
| 694 | default: |
||
| 695 | |||
| 696 | if (substr($text_part, $offset, 2) == 'ET') { |
||
| 697 | break; |
||
| 698 | } elseif (preg_match( |
||
| 699 | '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si', |
||
| 700 | substr($text_part, $offset), |
||
| 701 | $matches |
||
| 702 | ) |
||
| 703 | ) { |
||
| 704 | $operator = trim($matches['id']); |
||
| 705 | $command = trim($matches['data']); |
||
| 706 | $offset += strlen($matches[0]); |
||
| 707 | } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) { |
||
| 708 | $type = 'n'; |
||
| 709 | $command = trim($matches[0]); |
||
| 710 | $offset += strlen($matches[0]); |
||
| 711 | } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) { |
||
| 712 | $type = ''; |
||
| 713 | $operator = $matches[1]; |
||
| 714 | $command = ''; |
||
| 715 | $offset += strlen($matches[0]); |
||
| 716 | } |
||
| 717 | } |
||
| 718 | |||
| 719 | if ($command !== false) { |
||
| 720 | $commands[] = array( |
||
| 721 | self::TYPE => $type, |
||
| 722 | self::OPERATOR => $operator, |
||
| 723 | self::COMMAND => $command, |
||
| 724 | ); |
||
| 725 | } else { |
||
| 726 | break; |
||
| 727 | } |
||
| 728 | } |
||
| 729 | |||
| 730 | return $commands; |
||
| 731 | } |
||
| 732 | |||
| 733 | /** |
||
| 734 | * @param $document Document |
||
| 735 | * @param $header Header |
||
| 736 | * @param $content string |
||
| 737 | * |
||
| 738 | * @return PDFObject |
||
| 739 | */ |
||
| 740 | public static function factory(Document $document, Header $header, $content) |
||
| 741 | { |
||
| 742 | switch ($header->get('Type')->getContent()) { |
||
| 743 | case 'XObject': |
||
| 744 | switch ($header->get('Subtype')->getContent()) { |
||
| 745 | case 'Image': |
||
| 746 | return new Image($document, $header, $content); |
||
| 747 | |||
| 748 | case 'Form': |
||
| 749 | return new Form($document, $header, $content); |
||
| 750 | |||
| 751 | default: |
||
| 752 | return new PDFObject($document, $header, $content); |
||
| 753 | } |
||
| 754 | break; |
||
| 755 | |||
| 756 | case 'Pages': |
||
| 757 | return new Pages($document, $header, $content); |
||
| 758 | |||
| 759 | case 'Page': |
||
| 760 | return new Page($document, $header, $content); |
||
| 761 | |||
| 762 | case 'Encoding': |
||
| 763 | return new Encoding($document, $header, $content); |
||
| 764 | |||
| 765 | case 'Font': |
||
| 766 | $subtype = $header->get('Subtype')->getContent(); |
||
| 767 | $classname = '\Smalot\PdfParser\Font\Font' . $subtype; |
||
| 768 | |||
| 769 | if (class_exists($classname)) { |
||
| 770 | return new $classname($document, $header, $content); |
||
| 771 | } else { |
||
| 772 | return new Font($document, $header, $content); |
||
| 773 | } |
||
| 774 | |||
| 775 | default: |
||
| 776 | return new PDFObject($document, $header, $content); |
||
| 777 | } |
||
| 778 | } |
||
| 779 | |||
| 780 | /** |
||
| 781 | * Returns unique id identifying the object. |
||
| 782 | * |
||
| 783 | * @return string |
||
| 784 | */ |
||
| 785 | protected function getUniqueId() |
||
| 788 | } |
||
| 789 | } |
||
| 790 |