1 | <?php |
||||||
2 | |||||||
3 | /** |
||||||
4 | * @file |
||||||
5 | * This file is part of the PdfParser library. |
||||||
6 | * |
||||||
7 | * @author Sébastien MALOT <[email protected]> |
||||||
8 | * |
||||||
9 | * @date 2017-01-03 |
||||||
10 | * |
||||||
11 | * @license LGPLv3 |
||||||
12 | * |
||||||
13 | * @url <https://github.com/smalot/pdfparser> |
||||||
14 | * |
||||||
15 | * PdfParser is a pdf library written in PHP, extraction oriented. |
||||||
16 | * Copyright (C) 2017 - Sébastien MALOT <[email protected]> |
||||||
17 | * |
||||||
18 | * This program is free software: you can redistribute it and/or modify |
||||||
19 | * it under the terms of the GNU Lesser General Public License as published by |
||||||
20 | * the Free Software Foundation, either version 3 of the License, or |
||||||
21 | * (at your option) any later version. |
||||||
22 | * |
||||||
23 | * This program is distributed in the hope that it will be useful, |
||||||
24 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
26 | * GNU Lesser General Public License for more details. |
||||||
27 | * |
||||||
28 | * You should have received a copy of the GNU Lesser General Public License |
||||||
29 | * along with this program. |
||||||
30 | * If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>. |
||||||
31 | */ |
||||||
32 | |||||||
33 | namespace Smalot\PdfParser; |
||||||
34 | |||||||
35 | use Smalot\PdfParser\Element\ElementArray; |
||||||
36 | use Smalot\PdfParser\Element\ElementMissing; |
||||||
37 | use Smalot\PdfParser\Element\ElementNull; |
||||||
38 | use Smalot\PdfParser\Element\ElementXRef; |
||||||
39 | |||||||
40 | class Page extends PDFObject |
||||||
41 | { |
||||||
42 | /** |
||||||
43 | * @var Font[] |
||||||
44 | */ |
||||||
45 | protected $fonts; |
||||||
46 | |||||||
47 | /** |
||||||
48 | * @var PDFObject[] |
||||||
49 | */ |
||||||
50 | protected $xobjects; |
||||||
51 | |||||||
52 | /** |
||||||
53 | * @var array |
||||||
54 | */ |
||||||
55 | protected $dataTm; |
||||||
56 | |||||||
57 | /** |
||||||
58 | * @param array<\Smalot\PdfParser\Font> $fonts |
||||||
59 | * |
||||||
60 | * @internal |
||||||
61 | */ |
||||||
62 | 9 | public function setFonts($fonts) |
|||||
63 | { |
||||||
64 | 9 | if (empty($this->fonts)) { |
|||||
65 | 9 | $this->fonts = $fonts; |
|||||
66 | } |
||||||
67 | } |
||||||
68 | |||||||
69 | /** |
||||||
70 | * @return Font[] |
||||||
71 | */ |
||||||
72 | 51 | public function getFonts() |
|||||
73 | { |
||||||
74 | 51 | if (null !== $this->fonts) { |
|||||
75 | 49 | return $this->fonts; |
|||||
76 | } |
||||||
77 | |||||||
78 | 43 | $resources = $this->get('Resources'); |
|||||
79 | |||||||
80 | 43 | if (method_exists($resources, 'has') && $resources->has('Font')) { |
|||||
81 | 42 | if ($resources->get('Font') instanceof ElementMissing) { |
|||||
0 ignored issues
–
show
|
|||||||
82 | 1 | return []; |
|||||
83 | } |
||||||
84 | |||||||
85 | 41 | if ($resources->get('Font') instanceof Header) { |
|||||
86 | 35 | $fonts = $resources->get('Font')->getElements(); |
|||||
87 | } else { |
||||||
88 | 10 | $fonts = $resources->get('Font')->getHeader()->getElements(); |
|||||
89 | } |
||||||
90 | |||||||
91 | 41 | $table = []; |
|||||
92 | |||||||
93 | 41 | foreach ($fonts as $id => $font) { |
|||||
94 | 41 | if ($font instanceof Font) { |
|||||
95 | 41 | $table[$id] = $font; |
|||||
96 | |||||||
97 | // Store too on cleaned id value (only numeric) |
||||||
98 | 41 | $id = preg_replace('/[^0-9\.\-_]/', '', $id); |
|||||
99 | 41 | if ('' != $id) { |
|||||
100 | 40 | $table[$id] = $font; |
|||||
101 | } |
||||||
102 | } |
||||||
103 | } |
||||||
104 | |||||||
105 | 41 | return $this->fonts = $table; |
|||||
106 | } |
||||||
107 | |||||||
108 | 4 | return []; |
|||||
109 | } |
||||||
110 | |||||||
111 | 47 | public function getFont(string $id): ?Font |
|||||
112 | { |
||||||
113 | 47 | $fonts = $this->getFonts(); |
|||||
114 | |||||||
115 | 47 | if (isset($fonts[$id])) { |
|||||
116 | 47 | return $fonts[$id]; |
|||||
117 | } |
||||||
118 | |||||||
119 | // According to the PDF specs (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 238) |
||||||
120 | // "The font resource name presented to the Tf operator is arbitrary, as are the names for all kinds of resources" |
||||||
121 | // Instead, we search for the unfiltered name first and then do this cleaning as a fallback, so all tests still pass. |
||||||
122 | |||||||
123 | 1 | if (isset($fonts[$id])) { |
|||||
124 | return $fonts[$id]; |
||||||
125 | } else { |
||||||
126 | 1 | $id = preg_replace('/[^0-9\.\-_]/', '', $id); |
|||||
127 | 1 | if (isset($fonts[$id])) { |
|||||
128 | 1 | return $fonts[$id]; |
|||||
129 | } |
||||||
130 | } |
||||||
131 | |||||||
132 | return null; |
||||||
133 | } |
||||||
134 | |||||||
135 | /** |
||||||
136 | * Support for XObject |
||||||
137 | * |
||||||
138 | * @return PDFObject[] |
||||||
139 | */ |
||||||
140 | 16 | public function getXObjects() |
|||||
141 | { |
||||||
142 | 16 | if (null !== $this->xobjects) { |
|||||
143 | 10 | return $this->xobjects; |
|||||
144 | } |
||||||
145 | |||||||
146 | 16 | $resources = $this->get('Resources'); |
|||||
147 | |||||||
148 | 16 | if (method_exists($resources, 'has') && $resources->has('XObject')) { |
|||||
149 | 16 | if ($resources->get('XObject') instanceof Header) { |
|||||
150 | 15 | $xobjects = $resources->get('XObject')->getElements(); |
|||||
151 | } else { |
||||||
152 | 1 | $xobjects = $resources->get('XObject')->getHeader()->getElements(); |
|||||
153 | } |
||||||
154 | |||||||
155 | 16 | $table = []; |
|||||
156 | |||||||
157 | 16 | foreach ($xobjects as $id => $xobject) { |
|||||
158 | 16 | $table[$id] = $xobject; |
|||||
159 | |||||||
160 | // Store too on cleaned id value (only numeric) |
||||||
161 | 16 | $id = preg_replace('/[^0-9\.\-_]/', '', $id); |
|||||
162 | 16 | if ('' != $id) { |
|||||
163 | 16 | $table[$id] = $xobject; |
|||||
164 | } |
||||||
165 | } |
||||||
166 | |||||||
167 | 16 | return $this->xobjects = $table; |
|||||
168 | } |
||||||
169 | |||||||
170 | return []; |
||||||
171 | } |
||||||
172 | |||||||
173 | 15 | public function getXObject(string $id): ?PDFObject |
|||||
174 | { |
||||||
175 | 15 | $xobjects = $this->getXObjects(); |
|||||
176 | |||||||
177 | 15 | if (isset($xobjects[$id])) { |
|||||
178 | 15 | return $xobjects[$id]; |
|||||
179 | } |
||||||
180 | |||||||
181 | return null; |
||||||
182 | /*$id = preg_replace('/[^0-9\.\-_]/', '', $id); |
||||||
183 | |||||||
184 | if (isset($xobjects[$id])) { |
||||||
185 | return $xobjects[$id]; |
||||||
186 | } else { |
||||||
187 | return null; |
||||||
188 | }*/ |
||||||
189 | } |
||||||
190 | |||||||
191 | 35 | public function getText(?self $page = null): string |
|||||
192 | { |
||||||
193 | 35 | if ($contents = $this->get('Contents')) { |
|||||
194 | 35 | if ($contents instanceof ElementMissing) { |
|||||
195 | return ''; |
||||||
196 | 35 | } elseif ($contents instanceof ElementNull) { |
|||||
197 | return ''; |
||||||
198 | 35 | } elseif ($contents instanceof PDFObject) { |
|||||
0 ignored issues
–
show
|
|||||||
199 | 29 | $elements = $contents->getHeader()->getElements(); |
|||||
200 | |||||||
201 | 29 | if (is_numeric(key($elements))) { |
|||||
202 | $new_content = ''; |
||||||
203 | |||||||
204 | foreach ($elements as $element) { |
||||||
205 | if ($element instanceof ElementXRef) { |
||||||
206 | $new_content .= $element->getObject()->getContent(); |
||||||
207 | } else { |
||||||
208 | $new_content .= $element->getContent(); |
||||||
209 | } |
||||||
210 | } |
||||||
211 | |||||||
212 | $header = new Header([], $this->document); |
||||||
213 | 29 | $contents = new PDFObject($this->document, $header, $new_content, $this->config); |
|||||
214 | } |
||||||
215 | 9 | } elseif ($contents instanceof ElementArray) { |
|||||
216 | // Create a virtual global content. |
||||||
217 | 9 | $new_content = ''; |
|||||
218 | |||||||
219 | 9 | foreach ($contents->getContent() as $content) { |
|||||
220 | 9 | $new_content .= $content->getContent()."\n"; |
|||||
221 | } |
||||||
222 | |||||||
223 | 9 | $header = new Header([], $this->document); |
|||||
224 | 9 | $contents = new PDFObject($this->document, $header, $new_content, $this->config); |
|||||
0 ignored issues
–
show
It seems like
$this->document can also be of type null ; however, parameter $document of Smalot\PdfParser\PDFObject::__construct() does only seem to accept Smalot\PdfParser\Document , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
225 | } |
||||||
226 | |||||||
227 | /* |
||||||
228 | * Elements referencing each other on the same page can cause endless loops during text parsing. |
||||||
229 | * To combat this we keep a recursionStack containing already parsed elements on the page. |
||||||
230 | * The stack is only emptied here after getting text from a page. |
||||||
231 | */ |
||||||
232 | 35 | $contentsText = $contents->getText($this); |
|||||
0 ignored issues
–
show
The method
getText() does not exist on Smalot\PdfParser\Element .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||||||
233 | 35 | PDFObject::$recursionStack = []; |
|||||
234 | |||||||
235 | 35 | return $contentsText; |
|||||
236 | } |
||||||
237 | |||||||
238 | return ''; |
||||||
239 | } |
||||||
240 | |||||||
241 | /** |
||||||
242 | * Return true if the current page is a (setasign\Fpdi\Fpdi) FPDI/FPDF document |
||||||
243 | * |
||||||
244 | * The metadata 'Producer' should have the value of "FPDF" . FPDF_VERSION if the |
||||||
245 | * pdf file was generated by FPDF/Fpfi. |
||||||
246 | * |
||||||
247 | * @return bool true is the current page is a FPDI/FPDF document |
||||||
248 | */ |
||||||
249 | 13 | public function isFpdf(): bool |
|||||
250 | { |
||||||
251 | 13 | if (\array_key_exists('Producer', $this->document->getDetails()) |
|||||
0 ignored issues
–
show
The method
getDetails() does not exist on null .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||||||
252 | 13 | && \is_string($this->document->getDetails()['Producer']) |
|||||
253 | 13 | && 0 === strncmp($this->document->getDetails()['Producer'], 'FPDF', 4)) { |
|||||
254 | 2 | return true; |
|||||
255 | } |
||||||
256 | |||||||
257 | 12 | return false; |
|||||
258 | } |
||||||
259 | |||||||
260 | /** |
||||||
261 | * Return the page number of the PDF document of the page object |
||||||
262 | * |
||||||
263 | * @return int the page number |
||||||
264 | */ |
||||||
265 | 2 | public function getPageNumber(): int |
|||||
266 | { |
||||||
267 | 2 | $pages = $this->document->getPages(); |
|||||
268 | 2 | $numOfPages = \count($pages); |
|||||
269 | 2 | for ($pageNum = 0; $pageNum < $numOfPages; ++$pageNum) { |
|||||
270 | 2 | if ($pages[$pageNum] === $this) { |
|||||
271 | 2 | break; |
|||||
272 | } |
||||||
273 | } |
||||||
274 | |||||||
275 | 2 | return $pageNum; |
|||||
276 | } |
||||||
277 | |||||||
278 | /** |
||||||
279 | * Return the Object of the page if the document is a FPDF/FPDI document |
||||||
280 | * |
||||||
281 | * If the document was generated by FPDF/FPDI it returns the |
||||||
282 | * PDFObject of the given page |
||||||
283 | * |
||||||
284 | * @return PDFObject The PDFObject for the page |
||||||
285 | */ |
||||||
286 | 1 | public function getPDFObjectForFpdf(): PDFObject |
|||||
287 | { |
||||||
288 | 1 | $pageNum = $this->getPageNumber(); |
|||||
289 | 1 | $xObjects = $this->getXObjects(); |
|||||
290 | |||||||
291 | 1 | return $xObjects[$pageNum]; |
|||||
292 | } |
||||||
293 | |||||||
294 | /** |
||||||
295 | * Return a new PDFObject of the document created with FPDF/FPDI |
||||||
296 | * |
||||||
297 | * For a document generated by FPDF/FPDI, it generates a |
||||||
298 | * new PDFObject for that document |
||||||
299 | * |
||||||
300 | * @return PDFObject The PDFObject |
||||||
301 | */ |
||||||
302 | 1 | public function createPDFObjectForFpdf(): PDFObject |
|||||
303 | { |
||||||
304 | 1 | $pdfObject = $this->getPDFObjectForFpdf(); |
|||||
305 | 1 | $new_content = $pdfObject->getContent(); |
|||||
306 | 1 | $header = $pdfObject->getHeader(); |
|||||
307 | 1 | $config = $pdfObject->config; |
|||||
308 | |||||||
309 | 1 | return new PDFObject($pdfObject->document, $header, $new_content, $config); |
|||||
0 ignored issues
–
show
It seems like
$pdfObject->document can also be of type null ; however, parameter $document of Smalot\PdfParser\PDFObject::__construct() does only seem to accept Smalot\PdfParser\Document , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
310 | } |
||||||
311 | |||||||
312 | /** |
||||||
313 | * Return page if document is a FPDF/FPDI document |
||||||
314 | * |
||||||
315 | * @return Page The page |
||||||
316 | */ |
||||||
317 | 1 | public function createPageForFpdf(): self |
|||||
318 | { |
||||||
319 | 1 | $pdfObject = $this->getPDFObjectForFpdf(); |
|||||
320 | 1 | $new_content = $pdfObject->getContent(); |
|||||
321 | 1 | $header = $pdfObject->getHeader(); |
|||||
322 | 1 | $config = $pdfObject->config; |
|||||
323 | |||||||
324 | 1 | return new self($pdfObject->document, $header, $new_content, $config); |
|||||
0 ignored issues
–
show
It seems like
$pdfObject->document can also be of type null ; however, parameter $document of Smalot\PdfParser\Page::__construct() does only seem to accept Smalot\PdfParser\Document , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
325 | } |
||||||
326 | |||||||
327 | 8 | public function getTextArray(?self $page = null): array |
|||||
328 | { |
||||||
329 | 8 | if ($this->isFpdf()) { |
|||||
330 | 1 | $pdfObject = $this->getPDFObjectForFpdf(); |
|||||
331 | 1 | $newPdfObject = $this->createPDFObjectForFpdf(); |
|||||
332 | |||||||
333 | 1 | return $newPdfObject->getTextArray($pdfObject); |
|||||
334 | } else { |
||||||
335 | 7 | if ($contents = $this->get('Contents')) { |
|||||
336 | 7 | if ($contents instanceof ElementMissing) { |
|||||
337 | return []; |
||||||
338 | 7 | } elseif ($contents instanceof ElementNull) { |
|||||
339 | return []; |
||||||
340 | 7 | } elseif ($contents instanceof PDFObject) { |
|||||
0 ignored issues
–
show
|
|||||||
341 | 7 | $elements = $contents->getHeader()->getElements(); |
|||||
342 | |||||||
343 | 7 | if (is_numeric(key($elements))) { |
|||||
344 | $new_content = ''; |
||||||
345 | |||||||
346 | /** @var PDFObject $element */ |
||||||
347 | foreach ($elements as $element) { |
||||||
348 | if ($element instanceof ElementXRef) { |
||||||
349 | $new_content .= $element->getObject()->getContent(); |
||||||
350 | } else { |
||||||
351 | $new_content .= $element->getContent(); |
||||||
352 | } |
||||||
353 | } |
||||||
354 | |||||||
355 | $header = new Header([], $this->document); |
||||||
356 | $contents = new PDFObject($this->document, $header, $new_content, $this->config); |
||||||
357 | } else { |
||||||
358 | try { |
||||||
359 | 7 | $contents->getTextArray($this); |
|||||
360 | } catch (\Throwable $e) { |
||||||
361 | 7 | return $contents->getTextArray(); |
|||||
362 | } |
||||||
363 | } |
||||||
364 | 1 | } elseif ($contents instanceof ElementArray) { |
|||||
365 | // Create a virtual global content. |
||||||
366 | 1 | $new_content = ''; |
|||||
367 | |||||||
368 | /** @var PDFObject $content */ |
||||||
369 | 1 | foreach ($contents->getContent() as $content) { |
|||||
370 | 1 | $new_content .= $content->getContent()."\n"; |
|||||
371 | } |
||||||
372 | |||||||
373 | 1 | $header = new Header([], $this->document); |
|||||
374 | 1 | $contents = new PDFObject($this->document, $header, $new_content, $this->config); |
|||||
0 ignored issues
–
show
It seems like
$this->document can also be of type null ; however, parameter $document of Smalot\PdfParser\PDFObject::__construct() does only seem to accept Smalot\PdfParser\Document , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
375 | } |
||||||
376 | |||||||
377 | 7 | return $contents->getTextArray($this); |
|||||
0 ignored issues
–
show
The method
getTextArray() does not exist on Smalot\PdfParser\Element .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||||||
378 | } |
||||||
379 | |||||||
380 | return []; |
||||||
381 | } |
||||||
382 | } |
||||||
383 | |||||||
384 | /** |
||||||
385 | * Gets all the text data with its internal representation of the page. |
||||||
386 | * |
||||||
387 | * Returns an array with the data and the internal representation |
||||||
388 | */ |
||||||
389 | 12 | public function extractRawData(): array |
|||||
390 | { |
||||||
391 | /* |
||||||
392 | * Now you can get the complete content of the object with the text on it |
||||||
393 | */ |
||||||
394 | 12 | $extractedData = []; |
|||||
395 | 12 | $content = $this->get('Contents'); |
|||||
396 | 12 | $values = $content->getContent(); |
|||||
397 | 12 | if (isset($values) && \is_array($values)) { |
|||||
398 | 1 | $text = ''; |
|||||
399 | 1 | foreach ($values as $section) { |
|||||
400 | 1 | $text .= $section->getContent(); |
|||||
401 | } |
||||||
402 | 1 | $sectionsText = $this->getSectionsText($text); |
|||||
403 | 1 | foreach ($sectionsText as $sectionText) { |
|||||
404 | 1 | $commandsText = $this->getCommandsText($sectionText); |
|||||
405 | 1 | foreach ($commandsText as $command) { |
|||||
406 | 1 | $extractedData[] = $command; |
|||||
407 | } |
||||||
408 | } |
||||||
409 | } else { |
||||||
410 | 12 | if ($this->isFpdf()) { |
|||||
411 | 1 | $content = $this->getPDFObjectForFpdf(); |
|||||
412 | } |
||||||
413 | 12 | $sectionsText = $content->getSectionsText($content->getContent()); |
|||||
0 ignored issues
–
show
The method
getSectionsText() does not exist on Smalot\PdfParser\Element .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||||||
414 | 12 | foreach ($sectionsText as $sectionText) { |
|||||
415 | 12 | $commandsText = $content->getCommandsText($sectionText); |
|||||
0 ignored issues
–
show
The method
getCommandsText() does not exist on Smalot\PdfParser\Element .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||||||
416 | 12 | foreach ($commandsText as $command) { |
|||||
417 | 12 | $extractedData[] = $command; |
|||||
418 | } |
||||||
419 | } |
||||||
420 | } |
||||||
421 | |||||||
422 | 12 | return $extractedData; |
|||||
423 | } |
||||||
424 | |||||||
425 | /** |
||||||
426 | * Gets all the decoded text data with it internal representation from a page. |
||||||
427 | * |
||||||
428 | * @param array $extractedRawData the extracted data return by extractRawData or |
||||||
429 | * null if extractRawData should be called |
||||||
430 | * |
||||||
431 | * @return array An array with the data and the internal representation |
||||||
432 | */ |
||||||
433 | 11 | public function extractDecodedRawData(?array $extractedRawData = null): array |
|||||
434 | { |
||||||
435 | 11 | if (!isset($extractedRawData) || !$extractedRawData) { |
|||||
0 ignored issues
–
show
The expression
$extractedRawData of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using
Loading history...
|
|||||||
436 | 11 | $extractedRawData = $this->extractRawData(); |
|||||
437 | } |
||||||
438 | 11 | $currentFont = null; /** @var Font $currentFont */ |
|||||
439 | 11 | $clippedFont = null; |
|||||
440 | 11 | $fpdfPage = null; |
|||||
441 | 11 | if ($this->isFpdf()) { |
|||||
442 | 1 | $fpdfPage = $this->createPageForFpdf(); |
|||||
443 | } |
||||||
444 | 11 | foreach ($extractedRawData as &$command) { |
|||||
445 | 11 | if ('Tj' == $command['o'] || 'TJ' == $command['o']) { |
|||||
446 | 11 | $data = $command['c']; |
|||||
447 | 11 | if (!\is_array($data)) { |
|||||
448 | 9 | $tmpText = ''; |
|||||
449 | 9 | if (isset($currentFont)) { |
|||||
450 | 9 | $tmpText = $currentFont->decodeOctal($data); |
|||||
451 | // $tmpText = $currentFont->decodeHexadecimal($tmpText, false); |
||||||
452 | } |
||||||
453 | 9 | $tmpText = str_replace( |
|||||
454 | 9 | ['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '], |
|||||
455 | 9 | ['\\', '(', ')', "\n", "\r", "\t", ' '], |
|||||
456 | 9 | $tmpText |
|||||
457 | 9 | ); |
|||||
458 | 9 | $tmpText = mb_convert_encoding($tmpText, 'UTF-8', 'ISO-8859-1'); |
|||||
459 | 9 | if (isset($currentFont)) { |
|||||
460 | 9 | $tmpText = $currentFont->decodeContent($tmpText); |
|||||
0 ignored issues
–
show
It seems like
$tmpText can also be of type array ; however, parameter $text of Smalot\PdfParser\Font::decodeContent() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
461 | } |
||||||
462 | 9 | $command['c'] = $tmpText; |
|||||
463 | 9 | continue; |
|||||
464 | } |
||||||
465 | 11 | $numText = \count($data); |
|||||
466 | 11 | for ($i = 0; $i < $numText; ++$i) { |
|||||
467 | 11 | if (0 != ($i % 2)) { |
|||||
468 | 7 | continue; |
|||||
469 | } |
||||||
470 | 11 | $tmpText = $data[$i]['c']; |
|||||
471 | 11 | $decodedText = isset($currentFont) ? $currentFont->decodeOctal($tmpText) : $tmpText; |
|||||
472 | 11 | $decodedText = str_replace( |
|||||
473 | 11 | ['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '], |
|||||
474 | 11 | ['\\', '(', ')', "\n", "\r", "\t", ' '], |
|||||
475 | 11 | $decodedText |
|||||
476 | 11 | ); |
|||||
477 | |||||||
478 | 11 | $decodedText = mb_convert_encoding($decodedText, 'UTF-8', 'ISO-8859-1'); |
|||||
479 | |||||||
480 | 11 | if (isset($currentFont)) { |
|||||
481 | 11 | $decodedText = $currentFont->decodeContent($decodedText); |
|||||
482 | } |
||||||
483 | 11 | $command['c'][$i]['c'] = $decodedText; |
|||||
484 | 11 | continue; |
|||||
485 | } |
||||||
486 | 11 | } elseif ('Tf' == $command['o'] || 'TF' == $command['o']) { |
|||||
487 | 11 | $fontId = explode(' ', $command['c'])[0]; |
|||||
488 | // If document is a FPDI/FPDF the $page has the correct font |
||||||
489 | 11 | $currentFont = isset($fpdfPage) ? $fpdfPage->getFont($fontId) : $this->getFont($fontId); |
|||||
490 | 11 | continue; |
|||||
491 | 11 | } elseif ('Q' == $command['o']) { |
|||||
492 | 8 | $currentFont = $clippedFont; |
|||||
493 | 11 | } elseif ('q' == $command['o']) { |
|||||
494 | 8 | $clippedFont = $currentFont; |
|||||
495 | } |
||||||
496 | } |
||||||
497 | |||||||
498 | 11 | return $extractedRawData; |
|||||
499 | } |
||||||
500 | |||||||
501 | /** |
||||||
502 | * Gets just the Text commands that are involved in text positions and |
||||||
503 | * Text Matrix (Tm) |
||||||
504 | * |
||||||
505 | * It extract just the PDF commands that are involved with text positions, and |
||||||
506 | * the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ |
||||||
507 | * |
||||||
508 | * @param array $extractedDecodedRawData The data extracted by extractDecodeRawData. |
||||||
509 | * If it is null, the method extractDecodeRawData is called. |
||||||
510 | * |
||||||
511 | * @return array An array with the text command of the page |
||||||
512 | */ |
||||||
513 | 9 | public function getDataCommands(?array $extractedDecodedRawData = null): array |
|||||
514 | { |
||||||
515 | 9 | if (!isset($extractedDecodedRawData) || !$extractedDecodedRawData) { |
|||||
0 ignored issues
–
show
The expression
$extractedDecodedRawData of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using
Loading history...
|
|||||||
516 | 9 | $extractedDecodedRawData = $this->extractDecodedRawData(); |
|||||
517 | } |
||||||
518 | 9 | $extractedData = []; |
|||||
519 | 9 | foreach ($extractedDecodedRawData as $command) { |
|||||
520 | 9 | switch ($command['o']) { |
|||||
521 | /* |
||||||
522 | * BT |
||||||
523 | * Begin a text object, inicializind the Tm and Tlm to identity matrix |
||||||
524 | */ |
||||||
525 | 9 | case 'BT': |
|||||
526 | 9 | $extractedData[] = $command; |
|||||
527 | 9 | break; |
|||||
528 | |||||||
529 | /* |
||||||
530 | * ET |
||||||
531 | * End a text object, discarding the text matrix |
||||||
532 | */ |
||||||
533 | 9 | case 'ET': |
|||||
534 | 9 | $extractedData[] = $command; |
|||||
535 | 9 | break; |
|||||
536 | |||||||
537 | /* |
||||||
538 | * leading TL |
||||||
539 | * Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators. |
||||||
540 | * Initial value: 0 |
||||||
541 | */ |
||||||
542 | 9 | case 'TL': |
|||||
543 | 5 | $extractedData[] = $command; |
|||||
544 | 5 | break; |
|||||
545 | |||||||
546 | /* |
||||||
547 | * tx ty Td |
||||||
548 | * Move to the start of the next line, offset form the start of the |
||||||
549 | * current line by tx, ty. |
||||||
550 | */ |
||||||
551 | 9 | case 'Td': |
|||||
552 | 9 | $extractedData[] = $command; |
|||||
553 | 9 | break; |
|||||
554 | |||||||
555 | /* |
||||||
556 | * tx ty TD |
||||||
557 | * Move to the start of the next line, offset form the start of the |
||||||
558 | * current line by tx, ty. As a side effect, this operator set the leading |
||||||
559 | * parameter in the text state. This operator has the same effect as the |
||||||
560 | * code: |
||||||
561 | * -ty TL |
||||||
562 | * tx ty Td |
||||||
563 | */ |
||||||
564 | 9 | case 'TD': |
|||||
565 | $extractedData[] = $command; |
||||||
566 | break; |
||||||
567 | |||||||
568 | /* |
||||||
569 | * a b c d e f Tm |
||||||
570 | * Set the text matrix, Tm, and the text line matrix, Tlm. The operands are |
||||||
571 | * all numbers, and the initial value for Tm and Tlm is the identity matrix |
||||||
572 | * [1 0 0 1 0 0] |
||||||
573 | */ |
||||||
574 | 9 | case 'Tm': |
|||||
575 | 7 | $extractedData[] = $command; |
|||||
576 | 7 | break; |
|||||
577 | |||||||
578 | /* |
||||||
579 | * T* |
||||||
580 | * Move to the start of the next line. This operator has the same effect |
||||||
581 | * as the code: |
||||||
582 | * 0 Tl Td |
||||||
583 | * Where Tl is the current leading parameter in the text state. |
||||||
584 | */ |
||||||
585 | 9 | case 'T*': |
|||||
586 | 5 | $extractedData[] = $command; |
|||||
587 | 5 | break; |
|||||
588 | |||||||
589 | /* |
||||||
590 | * string Tj |
||||||
591 | * Show a Text String |
||||||
592 | */ |
||||||
593 | 9 | case 'Tj': |
|||||
594 | 8 | $extractedData[] = $command; |
|||||
595 | 8 | break; |
|||||
596 | |||||||
597 | /* |
||||||
598 | * string ' |
||||||
599 | * Move to the next line and show a text string. This operator has the |
||||||
600 | * same effect as the code: |
||||||
601 | * T* |
||||||
602 | * string Tj |
||||||
603 | */ |
||||||
604 | 9 | case "'": |
|||||
605 | $extractedData[] = $command; |
||||||
606 | break; |
||||||
607 | |||||||
608 | /* |
||||||
609 | * aw ac string " |
||||||
610 | * Move to the next lkine and show a text string, using aw as the word |
||||||
611 | * spacing and ac as the character spacing. This operator has the same |
||||||
612 | * effect as the code: |
||||||
613 | * aw Tw |
||||||
614 | * ac Tc |
||||||
615 | * string ' |
||||||
616 | * Tw set the word spacing, Tw, to wordSpace. |
||||||
617 | * Tc Set the character spacing, Tc, to charsSpace. |
||||||
618 | */ |
||||||
619 | 9 | case '"': |
|||||
620 | $extractedData[] = $command; |
||||||
621 | break; |
||||||
622 | |||||||
623 | 9 | case 'Tf': |
|||||
624 | 9 | case 'TF': |
|||||
625 | 9 | $extractedData[] = $command; |
|||||
626 | 9 | break; |
|||||
627 | |||||||
628 | /* |
||||||
629 | * array TJ |
||||||
630 | * Show one or more text strings allow individual glyph positioning. |
||||||
631 | * Each lement of array con be a string or a number. If the element is |
||||||
632 | * a string, this operator shows the string. If it is a number, the |
||||||
633 | * operator adjust the text position by that amount; that is, it translates |
||||||
634 | * the text matrix, Tm. This amount is substracted form the current |
||||||
635 | * horizontal or vertical coordinate, depending on the writing mode. |
||||||
636 | * in the default coordinate system, a positive adjustment has the effect |
||||||
637 | * of moving the next glyph painted either to the left or down by the given |
||||||
638 | * amount. |
||||||
639 | */ |
||||||
640 | 9 | case 'TJ': |
|||||
641 | 9 | $extractedData[] = $command; |
|||||
642 | 9 | break; |
|||||
643 | default: |
||||||
644 | } |
||||||
645 | } |
||||||
646 | |||||||
647 | 9 | return $extractedData; |
|||||
648 | } |
||||||
649 | |||||||
650 | /** |
||||||
651 | * Gets the Text Matrix of the text in the page |
||||||
652 | * |
||||||
653 | * Return an array where every item is an array where the first item is the |
||||||
654 | * Text Matrix (Tm) and the second is a string with the text data. The Text matrix |
||||||
655 | * is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the |
||||||
656 | * text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text. |
||||||
657 | * |
||||||
658 | * @param array $dataCommands the data extracted by getDataCommands |
||||||
659 | * if null getDataCommands is called |
||||||
660 | * |
||||||
661 | * @return array an array with the data of the page including the Tm information |
||||||
662 | * of any text in the page |
||||||
663 | */ |
||||||
664 | 8 | public function getDataTm(?array $dataCommands = null): array |
|||||
665 | { |
||||||
666 | 8 | if (!isset($dataCommands) || !$dataCommands) { |
|||||
0 ignored issues
–
show
The expression
$dataCommands of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using
Loading history...
|
|||||||
667 | 8 | $dataCommands = $this->getDataCommands(); |
|||||
668 | } |
||||||
669 | |||||||
670 | /* |
||||||
671 | * At the beginning of a text object Tm is the identity matrix |
||||||
672 | */ |
||||||
673 | 8 | $defaultTm = ['1', '0', '0', '1', '0', '0']; |
|||||
674 | |||||||
675 | /* |
||||||
676 | * Set the text leading used by T*, ' and " operators |
||||||
677 | */ |
||||||
678 | 8 | $defaultTl = 0; |
|||||
679 | |||||||
680 | /* |
||||||
681 | * Set default values for font data |
||||||
682 | */ |
||||||
683 | 8 | $defaultFontId = -1; |
|||||
684 | 8 | $defaultFontSize = 1; |
|||||
685 | |||||||
686 | /* |
||||||
687 | * Indexes of horizontal/vertical scaling and X,Y-coordinates in the matrix (Tm) |
||||||
688 | */ |
||||||
689 | 8 | $hSc = 0; // horizontal scaling |
|||||
690 | /** |
||||||
691 | * index of vertical scaling in the array that encodes the text matrix. |
||||||
692 | * for more information: https://github.com/smalot/pdfparser/pull/559#discussion_r1053415500 |
||||||
693 | */ |
||||||
694 | 8 | $vSc = 3; |
|||||
695 | 8 | $x = 4; |
|||||
696 | 8 | $y = 5; |
|||||
697 | |||||||
698 | /* |
||||||
699 | * x,y-coordinates of text space origin in user units |
||||||
700 | * |
||||||
701 | * These will be assigned the value of the currently printed string |
||||||
702 | */ |
||||||
703 | 8 | $Tx = 0; |
|||||
704 | 8 | $Ty = 0; |
|||||
705 | |||||||
706 | 8 | $Tm = $defaultTm; |
|||||
707 | 8 | $Tl = $defaultTl; |
|||||
708 | 8 | $fontId = $defaultFontId; |
|||||
709 | 8 | $fontSize = $defaultFontSize; // reflects fontSize set by Tf or Tfs |
|||||
710 | |||||||
711 | 8 | $extractedTexts = $this->getTextArray(); |
|||||
712 | 8 | $extractedData = []; |
|||||
713 | 8 | foreach ($dataCommands as $command) { |
|||||
714 | // If we've used up all the texts from getTextArray(), exit |
||||||
715 | // so we aren't accessing non-existent array indices |
||||||
716 | // Fixes 'undefined array key' errors in Issues #575, #576 |
||||||
717 | 8 | if (\count($extractedTexts) <= \count($extractedData)) { |
|||||
718 | 6 | break; |
|||||
719 | } |
||||||
720 | 8 | $currentText = $extractedTexts[\count($extractedData)]; |
|||||
721 | 8 | switch ($command['o']) { |
|||||
722 | /* |
||||||
723 | * BT |
||||||
724 | * Begin a text object, initializing the Tm and Tlm to identity matrix |
||||||
725 | */ |
||||||
726 | 8 | case 'BT': |
|||||
727 | 8 | $Tm = $defaultTm; |
|||||
728 | 8 | $Tl = $defaultTl; |
|||||
729 | 8 | $Tx = 0; |
|||||
730 | 8 | $Ty = 0; |
|||||
731 | 8 | break; |
|||||
732 | |||||||
733 | /* |
||||||
734 | * ET |
||||||
735 | * End a text object |
||||||
736 | */ |
||||||
737 | 8 | case 'ET': |
|||||
738 | 7 | break; |
|||||
739 | |||||||
740 | /* |
||||||
741 | * text leading TL |
||||||
742 | * Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators. |
||||||
743 | * Initial value: 0 |
||||||
744 | */ |
||||||
745 | 8 | case 'TL': |
|||||
746 | // scaled text leading |
||||||
747 | 4 | $Tl = (float) $command['c'] * (float) $Tm[$vSc]; |
|||||
748 | 4 | break; |
|||||
749 | |||||||
750 | /* |
||||||
751 | * tx ty Td |
||||||
752 | * Move to the start of the next line, offset from the start of the |
||||||
753 | * current line by tx, ty. |
||||||
754 | */ |
||||||
755 | 8 | case 'Td': |
|||||
756 | 8 | $coord = explode(' ', $command['c']); |
|||||
757 | 8 | $Tx += (float) $coord[0] * (float) $Tm[$hSc]; |
|||||
758 | 8 | $Ty += (float) $coord[1] * (float) $Tm[$vSc]; |
|||||
759 | 8 | $Tm[$x] = (string) $Tx; |
|||||
760 | 8 | $Tm[$y] = (string) $Ty; |
|||||
761 | 8 | break; |
|||||
762 | |||||||
763 | /* |
||||||
764 | * tx ty TD |
||||||
765 | * Move to the start of the next line, offset form the start of the |
||||||
766 | * current line by tx, ty. As a side effect, this operator set the leading |
||||||
767 | * parameter in the text state. This operator has the same effect as the |
||||||
768 | * code: |
||||||
769 | * -ty TL |
||||||
770 | * tx ty Td |
||||||
771 | */ |
||||||
772 | 8 | case 'TD': |
|||||
773 | 1 | $coord = explode(' ', $command['c']); |
|||||
774 | 1 | $Tl = -((float) $coord[1] * (float) $Tm[$vSc]); |
|||||
775 | 1 | $Tx += (float) $coord[0] * (float) $Tm[$hSc]; |
|||||
776 | 1 | $Ty += (float) $coord[1] * (float) $Tm[$vSc]; |
|||||
777 | 1 | $Tm[$x] = (string) $Tx; |
|||||
778 | 1 | $Tm[$y] = (string) $Ty; |
|||||
779 | 1 | break; |
|||||
780 | |||||||
781 | /* |
||||||
782 | * a b c d e f Tm |
||||||
783 | * Set the text matrix, Tm, and the text line matrix, Tlm. The operands are |
||||||
784 | * all numbers, and the initial value for Tm and Tlm is the identity matrix |
||||||
785 | * [1 0 0 1 0 0] |
||||||
786 | */ |
||||||
787 | 8 | case 'Tm': |
|||||
788 | 6 | $Tm = explode(' ', $command['c']); |
|||||
789 | 6 | $Tx = (float) $Tm[$x]; |
|||||
790 | 6 | $Ty = (float) $Tm[$y]; |
|||||
791 | 6 | break; |
|||||
792 | |||||||
793 | /* |
||||||
794 | * T* |
||||||
795 | * Move to the start of the next line. This operator has the same effect |
||||||
796 | * as the code: |
||||||
797 | * 0 Tl Td |
||||||
798 | * Where Tl is the current leading parameter in the text state. |
||||||
799 | */ |
||||||
800 | 8 | case 'T*': |
|||||
801 | 4 | $Ty -= $Tl; |
|||||
802 | 4 | $Tm[$y] = (string) $Ty; |
|||||
803 | 4 | break; |
|||||
804 | |||||||
805 | /* |
||||||
806 | * string Tj |
||||||
807 | * Show a Text String |
||||||
808 | */ |
||||||
809 | 8 | case 'Tj': |
|||||
810 | 7 | $data = [$Tm, $currentText]; |
|||||
811 | 7 | if ($this->config->getDataTmFontInfoHasToBeIncluded()) { |
|||||
0 ignored issues
–
show
The method
getDataTmFontInfoHasToBeIncluded() does not exist on null .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed.
Loading history...
|
|||||||
812 | 2 | $data[] = $fontId; |
|||||
813 | 2 | $data[] = $fontSize; |
|||||
814 | } |
||||||
815 | 7 | $extractedData[] = $data; |
|||||
816 | 7 | break; |
|||||
817 | |||||||
818 | /* |
||||||
819 | * string ' |
||||||
820 | * Move to the next line and show a text string. This operator has the |
||||||
821 | * same effect as the code: |
||||||
822 | * T* |
||||||
823 | * string Tj |
||||||
824 | */ |
||||||
825 | 8 | case "'": |
|||||
826 | 1 | $Ty -= $Tl; |
|||||
827 | 1 | $Tm[$y] = (string) $Ty; |
|||||
828 | 1 | $extractedData[] = [$Tm, $currentText]; |
|||||
829 | 1 | break; |
|||||
830 | |||||||
831 | /* |
||||||
832 | * aw ac string " |
||||||
833 | * Move to the next line and show a text string, using aw as the word |
||||||
834 | * spacing and ac as the character spacing. This operator has the same |
||||||
835 | * effect as the code: |
||||||
836 | * aw Tw |
||||||
837 | * ac Tc |
||||||
838 | * string ' |
||||||
839 | * Tw set the word spacing, Tw, to wordSpace. |
||||||
840 | * Tc Set the character spacing, Tc, to charsSpace. |
||||||
841 | */ |
||||||
842 | 8 | case '"': |
|||||
843 | $data = explode(' ', $currentText); |
||||||
844 | $Ty -= $Tl; |
||||||
845 | $Tm[$y] = (string) $Ty; |
||||||
846 | $extractedData[] = [$Tm, $data[2]]; // Verify |
||||||
847 | break; |
||||||
848 | |||||||
849 | 8 | case 'Tf': |
|||||
850 | /* |
||||||
851 | * From PDF 1.0 specification, page 106: |
||||||
852 | * fontname size Tf Set font and size |
||||||
853 | * Sets the text font and text size in the graphics state. There is no default value for |
||||||
854 | * either fontname or size; they must be selected using Tf before drawing any text. |
||||||
855 | * fontname is a resource name. size is a number expressed in text space units. |
||||||
856 | * |
||||||
857 | * Source: https://ia902503.us.archive.org/10/items/pdfy-0vt8s-egqFwDl7L2/PDF%20Reference%201.0.pdf |
||||||
858 | * Introduced with https://github.com/smalot/pdfparser/pull/516 |
||||||
859 | */ |
||||||
860 | 8 | list($fontId, $fontSize) = explode(' ', $command['c'], 2); |
|||||
861 | 8 | break; |
|||||
862 | |||||||
863 | /* |
||||||
864 | * array TJ |
||||||
865 | * Show one or more text strings allow individual glyph positioning. |
||||||
866 | * Each lement of array con be a string or a number. If the element is |
||||||
867 | * a string, this operator shows the string. If it is a number, the |
||||||
868 | * operator adjust the text position by that amount; that is, it translates |
||||||
869 | * the text matrix, Tm. This amount is substracted form the current |
||||||
870 | * horizontal or vertical coordinate, depending on the writing mode. |
||||||
871 | * in the default coordinate system, a positive adjustment has the effect |
||||||
872 | * of moving the next glyph painted either to the left or down by the given |
||||||
873 | * amount. |
||||||
874 | */ |
||||||
875 | 8 | case 'TJ': |
|||||
876 | 8 | $data = [$Tm, $currentText]; |
|||||
877 | 8 | if ($this->config->getDataTmFontInfoHasToBeIncluded()) { |
|||||
878 | 2 | $data[] = $fontId; |
|||||
879 | 2 | $data[] = $fontSize; |
|||||
880 | } |
||||||
881 | 8 | $extractedData[] = $data; |
|||||
882 | 8 | break; |
|||||
883 | default: |
||||||
884 | } |
||||||
885 | } |
||||||
886 | 8 | $this->dataTm = $extractedData; |
|||||
887 | |||||||
888 | 8 | return $extractedData; |
|||||
889 | } |
||||||
890 | |||||||
891 | /** |
||||||
892 | * Gets text data that are around the given coordinates (X,Y) |
||||||
893 | * |
||||||
894 | * If the text is in near the given coordinates (X,Y) (or the TM info), |
||||||
895 | * the text is returned. The extractedData return by getDataTm, could be use to see |
||||||
896 | * where is the coordinates of a given text, using the TM info for it. |
||||||
897 | * |
||||||
898 | * @param float $x The X value of the coordinate to search for. if null |
||||||
899 | * just the Y value is considered (same Row) |
||||||
900 | * @param float $y The Y value of the coordinate to search for |
||||||
901 | * just the X value is considered (same column) |
||||||
902 | * @param float $xError The value less or more to consider an X to be "near" |
||||||
903 | * @param float $yError The value less or more to consider an Y to be "near" |
||||||
904 | * |
||||||
905 | * @return array An array of text that are near the given coordinates. If no text |
||||||
906 | * "near" the x,y coordinate, an empty array is returned. If Both, x |
||||||
907 | * and y coordinates are null, null is returned. |
||||||
908 | */ |
||||||
909 | 2 | public function getTextXY(?float $x = null, ?float $y = null, float $xError = 0, float $yError = 0): array |
|||||
910 | { |
||||||
911 | 2 | if (!isset($this->dataTm) || !$this->dataTm) { |
|||||
0 ignored issues
–
show
The expression
$this->dataTm of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using
Loading history...
|
|||||||
912 | 1 | $this->getDataTm(); |
|||||
913 | } |
||||||
914 | |||||||
915 | 2 | if (null !== $x) { |
|||||
916 | 2 | $x = (float) $x; |
|||||
917 | } |
||||||
918 | |||||||
919 | 2 | if (null !== $y) { |
|||||
920 | 2 | $y = (float) $y; |
|||||
921 | } |
||||||
922 | |||||||
923 | 2 | if (null === $x && null === $y) { |
|||||
924 | return []; |
||||||
925 | } |
||||||
926 | |||||||
927 | 2 | $xError = (float) $xError; |
|||||
928 | 2 | $yError = (float) $yError; |
|||||
929 | |||||||
930 | 2 | $extractedData = []; |
|||||
931 | 2 | foreach ($this->dataTm as $item) { |
|||||
932 | 2 | $tm = $item[0]; |
|||||
933 | 2 | $xTm = (float) $tm[4]; |
|||||
934 | 2 | $yTm = (float) $tm[5]; |
|||||
935 | 2 | $text = $item[1]; |
|||||
936 | 2 | if (null === $y) { |
|||||
937 | if (($xTm >= ($x - $xError)) |
||||||
938 | && ($xTm <= ($x + $xError))) { |
||||||
939 | $extractedData[] = [$tm, $text]; |
||||||
940 | continue; |
||||||
941 | } |
||||||
942 | } |
||||||
943 | 2 | if (null === $x) { |
|||||
944 | if (($yTm >= ($y - $yError)) |
||||||
945 | && ($yTm <= ($y + $yError))) { |
||||||
946 | $extractedData[] = [$tm, $text]; |
||||||
947 | continue; |
||||||
948 | } |
||||||
949 | } |
||||||
950 | 2 | if (($xTm >= ($x - $xError)) |
|||||
951 | 2 | && ($xTm <= ($x + $xError)) |
|||||
952 | 2 | && ($yTm >= ($y - $yError)) |
|||||
953 | 2 | && ($yTm <= ($y + $yError))) { |
|||||
954 | 2 | $extractedData[] = [$tm, $text]; |
|||||
955 | 2 | continue; |
|||||
956 | } |
||||||
957 | } |
||||||
958 | |||||||
959 | 2 | return $extractedData; |
|||||
960 | } |
||||||
961 | } |
||||||
962 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.