| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * @file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  *          This file is part of the PdfParser library. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * @author  Sébastien MALOT <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * @date    2017-01-03 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * @license LGPLv3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * @url     <https://github.com/smalot/pdfparser> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  *  PdfParser is a pdf library written in PHP, extraction oriented. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  *  Copyright (C) 2017 - Sébastien MALOT <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  *  This program is free software: you can redistribute it and/or modify | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  *  it under the terms of the GNU Lesser General Public License as published by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  *  the Free Software Foundation, either version 3 of the License, or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  *  (at your option) any later version. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  *  This program is distributed in the hope that it will be useful, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  *  GNU Lesser General Public License for more details. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  *  You should have received a copy of the GNU Lesser General Public License | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  *  along with this program. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | namespace Smalot\PdfParser; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | use Smalot\PdfParser\XObject\Form; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  | use Smalot\PdfParser\XObject\Image; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  * Class PDFObject | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  | class PDFObject | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     const TYPE = 't'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     const OPERATOR = 'o'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     const COMMAND = 'c'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |      * The recursion stack. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |      * @var array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     public static $recursionStack = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |      * @var Document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     protected $document = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |      * @var Header | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |     protected $header = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |      * @var string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |     protected $content = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |      * @var Config | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |     protected $config; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 56 |  |     public function __construct( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         Document $document, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |         ?Header $header = null, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         ?string $content = null, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         ?Config $config = null | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |     ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 | 56 |  |         $this->document = $document; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 | 56 |  |         $this->header = null !== $header ? $header : new Header(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 | 56 |  |         $this->content = $content; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 | 56 |  |         $this->config = $config; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 | 56 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 | 43 |  |     public function init() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 | 43 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 | 3 |  |     public function getDocument(): Document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 | 3 |  |         return $this->document; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 | 43 |  |     public function getHeader(): ?Header | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 | 43 |  |         return $this->header; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 | 3 |  |     public function getConfig(): ?Config | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 | 3 |  |         return $this->config; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |      * @return Element|PDFObject|Header | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 | 44 |  |     public function get(string $name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 | 44 |  |         return $this->header->get($name); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 | 41 |  |     public function has(string $name): bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 | 41 |  |         return $this->header->has($name); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 | 2 |  |     public function getDetails(bool $deep = true): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 | 2 |  |         return $this->header->getDetails($deep); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 | 33 |  |     public function getContent(): ?string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 | 33 |  |         return $this->content; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 | 27 |  |     public function cleanContent(string $content, string $char = 'X') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 | 27 |  |         $char = $char[0]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 | 27 |  |         $content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         // Remove image bloc with binary content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 | 27 |  |         preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 | 27 |  |         foreach ($matches[0] as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |             $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |         // Clean content in square brackets [.....] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 | 27 |  |         preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 | 27 |  |         foreach ($matches[1] as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 | 18 |  |             $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         // Clean content in round brackets (.....) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 | 27 |  |         preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 | 27 |  |         foreach ($matches[1] as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 | 16 |  |             $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         // Clean structure | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 | 27 |  |         if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 | 27 |  |             $content = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 | 27 |  |             $level = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 | 27 |  |             foreach ($parts as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 | 27 |  |                 if ('<' == $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 | 14 |  |                     ++$level; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 | 27 |  |                 $content .= (0 == $level ? $part : str_repeat($char, \strlen($part))); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 | 27 |  |                 if ('>' == $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 | 14 |  |                     --$level; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         // Clean BDC and EMC markup | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 | 27 |  |         preg_match_all( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 | 27 |  |             '/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |             $content, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |             $matches, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 | 27 |  |             \PREG_OFFSET_CAPTURE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |         ); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 | 27 |  |         foreach ($matches[1] as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 | 3 |  |             $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 | 27 |  |         preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 | 27 |  |         foreach ($matches[1] as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 | 7 |  |             $content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 | 27 |  |         return $content; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 | 26 |  |     public function getSectionsText(?string $content): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 | 26 |  |         $sections = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 | 26 |  |         $content = ' '.$content.' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 | 26 |  |         $textCleaned = $this->cleanContent($content, '_'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |         // Extract text blocks. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 | 26 |  |         if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 | 24 |  |             foreach ($matches[2] as $pos => $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 | 24 |  |                 $text = $part[0]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 | 24 |  |                 if ('' === $text) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |                     continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 | 24 |  |                 $offset = $part[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 | 24 |  |                 $section = substr($content, $offset, \strlen($text)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |                 // Removes BDC and EMC markup. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 | 24 |  |                 $section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' '); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |                 // Add Q and q flags if detected around BT/ET. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |                 // @see: https://github.com/smalot/pdfparser/issues/387 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 | 24 |  |                 $section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : ''); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 | 24 |  |                 $sections[] = $section; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |         // Extract 'do' commands. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 | 26 |  |         if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 | 4 |  |             foreach ($matches[1] as $part) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 | 4 |  |                 $text = $part[0]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 | 4 |  |                 $offset = $part[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 | 4 |  |                 $section = substr($content, $offset, \strlen($text)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 | 4 |  |                 $sections[] = $section; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 | 26 |  |         return $sections; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 227 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 228 | 16 |  |     private function getDefaultFont(Page $page = null): Font | 
            
                                                                        
                            
            
                                    
            
            
                | 229 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 230 | 16 |  |         $fonts = []; | 
            
                                                                        
                            
            
                                    
            
            
                | 231 | 16 |  |         if (null !== $page) { | 
            
                                                                        
                            
            
                                    
            
            
                | 232 | 15 |  |             $fonts = $page->getFonts(); | 
            
                                                                        
                            
            
                                    
            
            
                | 233 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 234 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 235 | 16 |  |         $firstFont = $this->document->getFirstFont(); | 
            
                                                                        
                            
            
                                    
            
            
                | 236 | 16 |  |         if (null !== $firstFont) { | 
            
                                                                        
                            
            
                                    
            
            
                | 237 | 14 |  |             $fonts[] = $firstFont; | 
            
                                                                        
                            
            
                                    
            
            
                | 238 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 239 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 240 | 16 |  |         if (\count($fonts) > 0) { | 
            
                                                                        
                            
            
                                    
            
            
                | 241 | 14 |  |             return reset($fonts); | 
            
                                                                        
                            
            
                                    
            
            
                | 242 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 243 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 244 | 2 |  |         return new Font($this->document, null, null, $this->config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |      * @throws \Exception | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 | 16 |  |     public function getText(?Page $page = null): string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 | 16 |  |         $result = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 | 16 |  |         $sections = $this->getSectionsText($this->content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 | 16 |  |         $current_font = $this->getDefaultFont($page); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 | 16 |  |         $clipped_font = $current_font; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 | 16 |  |         $current_position_td = ['x' => false, 'y' => false]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 | 16 |  |         $current_position_tm = ['x' => false, 'y' => false]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 | 16 |  |         self::$recursionStack[] = $this->getUniqueId(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 | 16 |  |         foreach ($sections as $section) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 | 14 |  |             $commands = $this->getCommandsText($section); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 | 14 |  |             $reverse_text = false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 | 14 |  |             $text = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 | 14 |  |             foreach ($commands as $command) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 | 14 |  |                 switch ($command[self::OPERATOR]) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 | 14 |  |                     case 'BMC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 | 1 |  |                         if ('ReversedChars' == $command[self::COMMAND]) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 | 1 |  |                             $reverse_text = true; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |                     // set character spacing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 | 14 |  |                     case 'Tc': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 | 2 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |                     // move text current point | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 | 14 |  |                     case 'Td': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 | 11 |  |                         $args = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 | 11 |  |                         $y = array_pop($args); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 | 11 |  |                         $x = array_pop($args); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 | 11 |  |                         if (((float) $x <= 0) || | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 | 11 |  |                             (false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y'])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |                         ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |                             // vertical offset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 | 7 |  |                             $text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 | 11 |  |                         } elseif (false !== $current_position_td['x'] && (float) $x > (float) ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 | 11 |  |                                 $current_position_td['x'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |                             ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |                         ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |                             // horizontal offset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 | 8 |  |                             $text .= ' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 | 11 |  |                         $current_position_td = ['x' => $x, 'y' => $y]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 | 11 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |                     // move text current point and set leading | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 | 14 |  |                     case 'TD': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 | 1 |  |                         $args = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 | 1 |  |                         $y = array_pop($args); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 | 1 |  |                         $x = array_pop($args); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 | 1 |  |                         if ((float) $y < 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 | 1 |  |                             $text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |                         } elseif ((float) $x <= 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |                             $text .= ' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 | 14 |  |                     case 'Tf': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 | 14 |  |                         list($id) = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 | 14 |  |                         $id = trim($id, '/'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 | 14 |  |                         if (null !== $page) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 | 14 |  |                             $new_font = $page->getFont($id); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  |                             // If an invalid font ID is given, do not update the font. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  |                             // This should theoretically never happen, as the PDF spec states for the Tf operator: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 |  |  |                             // "The specified font value shall match a resource name in the Font entry of the default resource dictionary" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |                             // (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 320 |  |  |                             // But we want to make sure that malformed PDFs do not simply crash. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 321 | 14 |  |                             if (null !== $new_font) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 322 | 13 |  |                                 $current_font = $new_font; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 323 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 324 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 325 | 14 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 326 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 327 | 14 |  |                     case 'Q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 328 |  |  |                         // Use clip: restore font. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 329 | 3 |  |                         $current_font = $clipped_font; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 330 | 3 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 331 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 332 | 14 |  |                     case 'q': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 333 |  |  |                         // Use clip: save font. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 334 | 3 |  |                         $clipped_font = $current_font; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 335 | 3 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 336 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 337 | 14 |  |                     case "'": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 338 | 14 |  |                     case 'Tj': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 339 | 9 |  |                         $command[self::COMMAND] = [$command]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 340 |  |  |                         // no break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 341 | 13 |  |                     case 'TJ': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 342 | 14 |  |                         $sub_text = $current_font->decodeText($command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 343 | 14 |  |                         $text .= $sub_text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 344 | 14 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 345 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 346 |  |  |                     // set leading | 
            
                                                                                                            
                            
            
                                    
            
            
                | 347 | 11 |  |                     case 'TL': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 348 | 1 |  |                         $text .= ' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 349 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 350 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 351 | 11 |  |                     case 'Tm': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 352 | 11 |  |                         $args = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 353 | 11 |  |                         $y = array_pop($args); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 354 | 11 |  |                         $x = array_pop($args); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 355 | 11 |  |                         if (false !== $current_position_tm['x']) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 356 | 11 |  |                             $delta = abs((float) $x - (float) ($current_position_tm['x'])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 357 | 11 |  |                             if ($delta > 10) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 358 | 9 |  |                                 $text .= "\t"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 359 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 360 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 361 | 11 |  |                         if (false !== $current_position_tm['y']) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 362 | 11 |  |                             $delta = abs((float) $y - (float) ($current_position_tm['y'])); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 363 | 11 |  |                             if ($delta > 10) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 364 | 7 |  |                                 $text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 365 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 366 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 367 | 11 |  |                         $current_position_tm = ['x' => $x, 'y' => $y]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 368 | 11 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 369 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 370 |  |  |                     // set super/subscripting text rise | 
            
                                                                                                            
                            
            
                                    
            
            
                | 371 | 8 |  |                     case 'Ts': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 372 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 373 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 374 |  |  |                     // set word spacing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 375 | 8 |  |                     case 'Tw': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 376 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 377 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 378 |  |  |                     // set horizontal scaling | 
            
                                                                                                            
                            
            
                                    
            
            
                | 379 | 8 |  |                     case 'Tz': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 380 |  |  |                         $text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 381 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 382 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 383 |  |  |                     // move to start of next line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 384 | 8 |  |                     case 'T*': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 385 | 2 |  |                         $text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 386 | 2 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 387 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 388 | 7 |  |                     case 'Da': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 389 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 390 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 391 | 7 |  |                     case 'Do': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 392 | 4 |  |                         if (null !== $page) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 393 | 4 |  |                             $args = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 394 | 4 |  |                             $id = trim(array_pop($args), '/ '); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 395 | 4 |  |                             $xobject = $page->getXObject($id); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 396 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 397 |  |  |                             // @todo $xobject could be a ElementXRef object, which would then throw an error | 
            
                                                                                                            
                            
            
                                    
            
            
                | 398 | 4 |  |                             if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 399 |  |  |                                 // Not a circular reference. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 400 | 4 |  |                                 $text .= $xobject->getText($page); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 401 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 402 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 403 | 4 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 404 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 405 | 5 |  |                     case 'rg': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 406 | 5 |  |                     case 'RG': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 407 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 408 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 409 | 5 |  |                     case 're': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 410 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 411 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 412 | 5 |  |                     case 'co': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 413 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 414 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 415 | 5 |  |                     case 'cs': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 416 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 417 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 418 | 5 |  |                     case 'gs': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 419 | 3 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 420 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 421 | 4 |  |                     case 'en': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 422 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 423 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 424 | 4 |  |                     case 'sc': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 425 | 4 |  |                     case 'SC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 426 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 427 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 428 | 4 |  |                     case 'g': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 429 | 4 |  |                     case 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 430 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 431 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 432 | 3 |  |                     case 'V': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 433 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 434 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 435 | 3 |  |                     case 'vo': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 436 | 3 |  |                     case 'Vo': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 437 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 438 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 439 |  |  |                     default: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 440 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 441 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 442 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 443 |  |  |             // Fix Hebrew and other reverse text oriented languages. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 444 |  |  |             // @see: https://github.com/smalot/pdfparser/issues/398 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 445 | 14 |  |             if ($reverse_text) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 446 | 1 |  |                 $chars = mb_str_split($text, 1, mb_internal_encoding()); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 447 | 1 |  |                 $text = implode('', array_reverse($chars)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 448 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 449 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 450 | 14 |  |             $result .= $text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 451 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 452 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 453 | 16 |  |         return $result.' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 454 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 455 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 456 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 457 |  |  |      * @throws \Exception | 
            
                                                                                                            
                            
            
                                    
            
            
                | 458 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 459 | 5 |  |     public function getTextArray(?Page $page = null): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 460 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 461 | 5 |  |         $text = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 462 | 5 |  |         $sections = $this->getSectionsText($this->content); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 463 | 5 |  |         $current_font = new Font($this->document, null, null, $this->config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 464 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 465 | 5 |  |         foreach ($sections as $section) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 466 | 5 |  |             $commands = $this->getCommandsText($section); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 467 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 468 | 5 |  |             foreach ($commands as $command) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 469 | 5 |  |                 switch ($command[self::OPERATOR]) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 470 |  |  |                     // set character spacing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 471 | 5 |  |                     case 'Tc': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 472 | 2 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 473 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 474 |  |  |                     // move text current point | 
            
                                                                                                            
                            
            
                                    
            
            
                | 475 | 5 |  |                     case 'Td': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 476 | 5 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 477 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 478 |  |  |                     // move text current point and set leading | 
            
                                                                                                            
                            
            
                                    
            
            
                | 479 | 5 |  |                     case 'TD': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 480 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 481 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 482 | 5 |  |                     case 'Tf': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 483 | 5 |  |                         if (null !== $page) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 484 | 5 |  |                             list($id) = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 485 | 5 |  |                             $id = trim($id, '/'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 486 | 5 |  |                             $current_font = $page->getFont($id); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 487 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 488 | 5 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 489 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 490 | 5 |  |                     case "'": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 491 | 5 |  |                     case 'Tj': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 492 | 4 |  |                         $command[self::COMMAND] = [$command]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 493 |  |  |                         // no break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 494 | 5 |  |                     case 'TJ': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 495 | 5 |  |                         $sub_text = $current_font->decodeText($command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 496 | 5 |  |                         $text[] = $sub_text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 497 | 5 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 498 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 499 |  |  |                     // set leading | 
            
                                                                                                            
                            
            
                                    
            
            
                | 500 | 4 |  |                     case 'TL': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 501 | 3 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 502 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 503 | 4 |  |                     case 'Tm': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 504 | 3 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 505 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 506 |  |  |                     // set super/subscripting text rise | 
            
                                                                                                            
                            
            
                                    
            
            
                | 507 | 4 |  |                     case 'Ts': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 508 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 509 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 510 |  |  |                     // set word spacing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 511 | 4 |  |                     case 'Tw': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 512 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 513 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 514 |  |  |                     // set horizontal scaling | 
            
                                                                                                            
                            
            
                                    
            
            
                | 515 | 4 |  |                     case 'Tz': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 516 |  |  |                         //$text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 517 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 518 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 519 |  |  |                     // move to start of next line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 520 | 4 |  |                     case 'T*': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 521 |  |  |                         //$text .= "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 522 | 3 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 523 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 524 | 3 |  |                     case 'Da': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 525 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 526 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 527 | 3 |  |                     case 'Do': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 528 |  |  |                         if (null !== $page) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 529 |  |  |                             $args = preg_split('/\s/s', $command[self::COMMAND]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 530 |  |  |                             $id = trim(array_pop($args), '/ '); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 531 |  |  |                             if ($xobject = $page->getXObject($id)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 532 |  |  |                                 $text[] = $xobject->getText($page); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 533 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 534 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 535 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 536 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 537 | 3 |  |                     case 'rg': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 538 | 3 |  |                     case 'RG': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 539 | 2 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 540 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 541 | 3 |  |                     case 're': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 542 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 543 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 544 | 3 |  |                     case 'co': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 545 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 546 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 547 | 3 |  |                     case 'cs': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 548 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 549 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 550 | 3 |  |                     case 'gs': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 551 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 552 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 553 | 3 |  |                     case 'en': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 554 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 555 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 556 | 3 |  |                     case 'sc': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 557 | 3 |  |                     case 'SC': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 558 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 559 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 560 | 3 |  |                     case 'g': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 561 | 3 |  |                     case 'G': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 562 | 2 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 563 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 564 | 1 |  |                     case 'V': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 565 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 566 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 567 | 1 |  |                     case 'vo': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 568 | 1 |  |                     case 'Vo': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 569 |  |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 570 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 571 |  |  |                     default: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 572 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 573 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 574 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 575 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 576 | 5 |  |         return $text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 577 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 578 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 579 | 24 |  |     public function getCommandsText(string $text_part, int &$offset = 0): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 580 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 581 | 24 |  |         $commands = $matches = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 582 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 583 | 24 |  |         while ($offset < \strlen($text_part)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 584 | 24 |  |             $offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 585 | 24 |  |             $char = $text_part[$offset]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 586 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 587 | 24 |  |             $operator = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 588 | 24 |  |             $type = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 589 | 24 |  |             $command = false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 590 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 591 | 24 |  |             switch ($char) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 592 | 24 |  |                 case '/': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 593 | 24 |  |                     $type = $char; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 594 | 24 |  |                     if (preg_match( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 595 | 24 |  |                         '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 596 | 24 |  |                         substr($text_part, $offset), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 597 |  |  |                         $matches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 598 |  |  |                     ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 599 |  |  |                     ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 600 | 24 |  |                         $operator = $matches[2]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 601 | 24 |  |                         $command = $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 602 | 24 |  |                         $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 603 | 7 |  |                     } elseif (preg_match( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 604 | 7 |  |                         '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 605 | 7 |  |                         substr($text_part, $offset), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 606 |  |  |                         $matches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 607 |  |  |                     ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 608 |  |  |                     ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 609 | 7 |  |                         $operator = $matches[2]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 610 | 7 |  |                         $command = $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 611 | 7 |  |                         $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 612 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 613 | 24 |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 614 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 615 | 24 |  |                 case '[': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 616 | 24 |  |                 case ']': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 617 |  |  |                     // array object | 
            
                                                                                                            
                            
            
                                    
            
            
                | 618 | 21 |  |                     $type = $char; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 619 | 21 |  |                     if ('[' == $char) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 620 | 21 |  |                         ++$offset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 621 |  |  |                         // get elements | 
            
                                                                                                            
                            
            
                                    
            
            
                | 622 | 21 |  |                         $command = $this->getCommandsText($text_part, $offset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 623 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 624 | 21 |  |                         if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 625 | 21 |  |                             $operator = trim($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 626 | 21 |  |                             $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 627 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 628 |  |  |                     } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 629 | 21 |  |                         ++$offset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 630 | 21 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 631 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 632 | 21 |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 633 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 634 | 24 |  |                 case '<': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 635 | 24 |  |                 case '>': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 636 |  |  |                     // array object | 
            
                                                                                                            
                            
            
                                    
            
            
                | 637 | 10 |  |                     $type = $char; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 638 | 10 |  |                     ++$offset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 639 | 10 |  |                     if ('<' == $char) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 640 | 10 |  |                         $strpos = strpos($text_part, '>', $offset); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 641 | 10 |  |                         $command = substr($text_part, $offset, ($strpos - $offset)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 642 | 10 |  |                         $offset = $strpos + 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 643 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 644 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 645 | 10 |  |                     if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 646 | 7 |  |                         $operator = trim($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 647 | 7 |  |                         $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 648 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 649 | 10 |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 650 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 651 | 24 |  |                 case '(': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 652 | 24 |  |                 case ')': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 653 | 17 |  |                     ++$offset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 654 | 17 |  |                     $type = $char; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 655 | 17 |  |                     $strpos = $offset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 656 | 17 |  |                     if ('(' == $char) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 657 | 17 |  |                         $open_bracket = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 658 | 17 |  |                         while ($open_bracket > 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 659 | 17 |  |                             if (!isset($text_part[$strpos])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 660 |  |  |                                 break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 661 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 662 | 17 |  |                             $ch = $text_part[$strpos]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 663 | 17 |  |                             switch ($ch) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 664 | 17 |  |                                 case '\\': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 665 |  |  |                                  // REVERSE SOLIDUS (5Ch) (Backslash) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 666 |  |  |                                     // skip next character | 
            
                                                                                                            
                            
            
                                    
            
            
                | 667 | 11 |  |                                     ++$strpos; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 668 | 11 |  |                                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 669 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 670 | 17 |  |                                 case '(': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 671 |  |  |                                  // LEFT PARENHESIS (28h) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 672 |  |  |                                     ++$open_bracket; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 673 |  |  |                                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 674 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 675 | 17 |  |                                 case ')': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 676 |  |  |                                  // RIGHT PARENTHESIS (29h) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 677 | 17 |  |                                     --$open_bracket; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 678 | 17 |  |                                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 679 |  |  |                             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 680 | 17 |  |                             ++$strpos; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 681 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 682 | 17 |  |                         $command = substr($text_part, $offset, ($strpos - $offset - 1)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 683 | 17 |  |                         $offset = $strpos; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 684 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 685 | 17 |  |                         if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 686 | 13 |  |                             $operator = $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 687 | 13 |  |                             $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 688 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 689 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 690 | 17 |  |                     break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 691 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 692 |  |  |                 default: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 693 | 24 |  |                     if ('ET' == substr($text_part, $offset, 2)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 694 | 1 |  |                         break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 695 | 24 |  |                     } elseif (preg_match( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 696 | 24 |  |                         '/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 697 | 24 |  |                         substr($text_part, $offset), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 698 |  |  |                         $matches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 699 |  |  |                     ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 700 |  |  |                     ) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 701 | 24 |  |                         $operator = trim($matches['id']); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 702 | 24 |  |                         $command = trim($matches['data']); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 703 | 24 |  |                         $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 704 | 19 |  |                     } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 705 | 18 |  |                         $type = 'n'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 706 | 18 |  |                         $command = trim($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 707 | 18 |  |                         $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 708 | 12 |  |                     } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 709 | 12 |  |                         $type = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 710 | 12 |  |                         $operator = $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 711 | 12 |  |                         $command = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 712 | 12 |  |                         $offset += \strlen($matches[0]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 713 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 714 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 715 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 716 | 24 |  |             if (false !== $command) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 717 | 24 |  |                 $commands[] = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 718 | 24 |  |                     self::TYPE => $type, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 719 | 24 |  |                     self::OPERATOR => $operator, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 720 | 24 |  |                     self::COMMAND => $command, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 721 |  |  |                 ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 722 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 723 | 21 |  |                 break; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 724 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 725 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 726 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 727 | 24 |  |         return $commands; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 728 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 729 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 730 | 36 |  |     public static function factory( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 731 |  |  |         Document $document, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 732 |  |  |         Header $header, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 733 |  |  |         ?string $content, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 734 |  |  |         ?Config $config = null | 
            
                                                                                                            
                            
            
                                    
            
            
                | 735 |  |  |     ): self { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 736 | 36 |  |         switch ($header->get('Type')->getContent()) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 737 | 36 |  |             case 'XObject': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 738 | 8 |  |                 switch ($header->get('Subtype')->getContent()) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 739 | 8 |  |                     case 'Image': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 740 | 3 |  |                         return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 741 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 742 | 6 |  |                     case 'Form': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 743 | 6 |  |                         return new Form($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 744 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 745 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 746 |  |  |                 return new self($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 747 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 748 | 36 |  |             case 'Pages': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 749 | 35 |  |                 return new Pages($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 750 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 751 | 36 |  |             case 'Page': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 752 | 35 |  |                 return new Page($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 753 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 754 | 36 |  |             case 'Encoding': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 755 | 5 |  |                 return new Encoding($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 756 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 757 | 36 |  |             case 'Font': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 758 | 35 |  |                 $subtype = $header->get('Subtype')->getContent(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 759 | 35 |  |                 $classname = '\Smalot\PdfParser\Font\Font'.$subtype; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 760 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 761 | 35 |  |                 if (class_exists($classname)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 762 | 35 |  |                     return new $classname($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 763 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 764 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 765 |  |  |                 return new Font($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 766 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 767 |  |  |             default: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 768 | 36 |  |                 return new self($document, $header, $content, $config); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 769 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 770 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 771 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 772 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 773 |  |  |      * Returns unique id identifying the object. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 774 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 775 | 16 |  |     protected function getUniqueId(): string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 776 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 777 | 16 |  |         return spl_object_hash($this); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 778 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 779 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 780 |  |  |  | 
            
                        
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.