Test Failed
Pull Request — master (#457)
by
unknown
01:37
created

Page::getTextXY()   D

Complexity

Conditions 18
Paths 104

Size

Total Lines 51
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 25
CRAP Score 24.009

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 18
eloc 33
c 2
b 0
f 0
nc 104
nop 4
dl 0
loc 51
ccs 25
cts 34
cp 0.7352
crap 24.009
rs 4.8333

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\Element\ElementArray;
34
use Smalot\PdfParser\Element\ElementMissing;
35
use Smalot\PdfParser\Element\ElementNull;
36
use Smalot\PdfParser\Element\ElementXRef;
37
38
class Page extends PDFObject
39
{
40
    /**
41
     * @var Font[]
42
     */
43
    protected $fonts = null;
44
45
    /**
46
     * @var PDFObject[]
47
     */
48
    protected $xobjects = null;
49
50
    /**
51
     * @var array
52
     */
53
    protected $dataTm = null;
54
55
    /**
56
     * @return Font[]
57
     */
58 23
    public function getFonts()
59
    {
60 23
        if (null !== $this->fonts) {
61 19
            return $this->fonts;
62
        }
63
64 23
        $resources = $this->get('Resources');
65
66 23
        if (method_exists($resources, 'has') && $resources->has('Font')) {
67 20
            if ($resources->get('Font') instanceof ElementMissing) {
68 1
                return [];
69
            }
70
71 19
            if ($resources->get('Font') instanceof Header) {
72 13
                $fonts = $resources->get('Font')->getElements();
73
            } else {
74 8
                $fonts = $resources->get('Font')->getHeader()->getElements();
75
            }
76
77 19
            $table = [];
78
79 19
            foreach ($fonts as $id => $font) {
80 19
                if ($font instanceof Font) {
81 19
                    $table[$id] = $font;
82
83
                    // Store too on cleaned id value (only numeric)
84 19
                    $id = preg_replace('/[^0-9\.\-_]/', '', $id);
85 19
                    if ('' != $id) {
86 19
                        $table[$id] = $font;
87
                    }
88
                }
89
            }
90
91 19
            return $this->fonts = $table;
92
        }
93
94 5
        return [];
95
    }
96
97 21
    public function getFont(string $id): ?Font
98
    {
99 21
        $fonts = $this->getFonts();
100
101 21
        if (isset($fonts[$id])) {
102 18
            return $fonts[$id];
103
        }
104
105
        // According to the PDF specs (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 238)
106
        // "The font resource name presented to the Tf operator is arbitrary, as are the names for all kinds of resources"
107
        // Instead, we search for the unfiltered name first and then do this cleaning as a fallback, so all tests still pass.
108
109 4
        if (isset($fonts[$id])) {
110
            return $fonts[$id];
111
        } else {
112 4
            $id = preg_replace('/[^0-9\.\-_]/', '', $id);
113 4
            if (isset($fonts[$id])) {
114 1
                return $fonts[$id];
115
            }
116
        }
117
118 3
        return null;
119
    }
120
121
    /**
122
     * Support for XObject
123
     *
124
     * @return PDFObject[]
125
     */
126 4
    public function getXObjects()
127
    {
128 4
        if (null !== $this->xobjects) {
129 3
            return $this->xobjects;
130
        }
131
132 4
        $resources = $this->get('Resources');
133
134 4
        if (method_exists($resources, 'has') && $resources->has('XObject')) {
135 4
            if ($resources->get('XObject') instanceof Header) {
136 4
                $xobjects = $resources->get('XObject')->getElements();
137
            } else {
138
                $xobjects = $resources->get('XObject')->getHeader()->getElements();
139
            }
140
141 4
            $table = [];
142
143 4
            foreach ($xobjects as $id => $xobject) {
144 4
                $table[$id] = $xobject;
145
146
                // Store too on cleaned id value (only numeric)
147 4
                $id = preg_replace('/[^0-9\.\-_]/', '', $id);
148 4
                if ('' != $id) {
149 4
                    $table[$id] = $xobject;
150
                }
151
            }
152
153 4
            return $this->xobjects = $table;
154
        }
155
156
        return [];
157
    }
158
159 4
    public function getXObject(string $id): ?PDFObject
160
    {
161 4
        $xobjects = $this->getXObjects();
162
163 4
        if (isset($xobjects[$id])) {
164 4
            return $xobjects[$id];
165
        }
166
167
        return null;
168
        /*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
169
170
        if (isset($xobjects[$id])) {
171
            return $xobjects[$id];
172
        } else {
173
            return null;
174
        }*/
175
    }
176
177 13
    public function getText(self $page = null): string
178
    {
179 13
        if ($contents = $this->get('Contents')) {
180 13
            if ($contents instanceof ElementMissing) {
181
                return '';
182 13
            } elseif ($contents instanceof ElementNull) {
183
                return '';
184 13
            } elseif ($contents instanceof PDFObject) {
185 10
                $elements = $contents->getHeader()->getElements();
186
187 10
                if (is_numeric(key($elements))) {
188
                    $new_content = '';
189
190
                    foreach ($elements as $element) {
191
                        if ($element instanceof ElementXRef) {
192
                            $new_content .= $element->getObject()->getContent();
193
                        } else {
194
                            $new_content .= $element->getContent();
195
                        }
196
                    }
197
198
                    $header = new Header([], $this->document);
199 10
                    $contents = new PDFObject($this->document, $header, $new_content, $this->config);
200
                }
201 3
            } elseif ($contents instanceof ElementArray) {
202
                // Create a virtual global content.
203 3
                $new_content = '';
204
205 3
                foreach ($contents->getContent() as $content) {
206 3
                    $new_content .= $content->getContent()."\n";
207
                }
208
209 3
                $header = new Header([], $this->document);
210 3
                $contents = new PDFObject($this->document, $header, $new_content, $this->config);
211
            }
212
213 13
            $contentsText = $contents->getText($this);
214
            PDFObject::$recursionStack = [];
215
216
            return $contentsText
217
        }
0 ignored issues
show
Bug introduced by
A parse error occurred: Syntax error, unexpected '}', expecting ';' on line 217 at column 8
Loading history...
218
219 4
        return '';
220
    }
221 4
222 4
    public function getTextArray(self $page = null): array
223
    {
224 4
        if ($contents = $this->get('Contents')) {
225
            if ($contents instanceof ElementMissing) {
226 4
                return [];
227 4
            } elseif ($contents instanceof ElementNull) {
228
                return [];
229 4
            } elseif ($contents instanceof PDFObject) {
230
                $elements = $contents->getHeader()->getElements();
231
232
                if (is_numeric(key($elements))) {
233
                    $new_content = '';
234
235
                    /** @var PDFObject $element */
236
                    foreach ($elements as $element) {
237
                        if ($element instanceof ElementXRef) {
238
                            $new_content .= $element->getObject()->getContent();
239
                        } else {
240
                            $new_content .= $element->getContent();
241
                        }
242
                    }
243
244
                    $header = new Header([], $this->document);
245 4
                    $contents = new PDFObject($this->document, $header, $new_content, $this->config);
246 1
                } else {
247 4
                    try {
248
                        $contents->getTextArray($this);
249
                    } catch (\Throwable $e) {
250
                        return $contents->getTextArray();
251
                    }
252
                }
253
            } elseif ($contents instanceof ElementArray) {
254
                // Create a virtual global content.
255
                $new_content = '';
256
257
                /** @var PDFObject $content */
258
                foreach ($contents->getContent() as $content) {
259
                    $new_content .= $content->getContent()."\n";
260
                }
261
262
                $header = new Header([], $this->document);
263 3
                $contents = new PDFObject($this->document, $header, $new_content, $this->config);
264
            }
265
266
            return $contents->getTextArray($this);
267
        }
268
269
        return [];
270
    }
271
272
    /**
273
     * Gets all the text data with its internal representation of the page.
274 8
     *
275
     * Returns an array with the data and the internal representation
276
     */
277
    public function extractRawData(): array
278
    {
279 8
        /*
280 8
         * Now you can get the complete content of the object with the text on it
281 8
         */
282 8
        $extractedData = [];
283
        $content = $this->get('Contents');
284
        $values = $content->getContent();
285
        if (isset($values) && \is_array($values)) {
286
            $text = '';
287
            foreach ($values as $section) {
288
                $text .= $section->getContent();
289
            }
290
            $sectionsText = $this->getSectionsText($text);
291
            foreach ($sectionsText as $sectionText) {
292
                $commandsText = $this->getCommandsText($sectionText);
293
                foreach ($commandsText as $command) {
294
                    $extractedData[] = $command;
295 8
                }
296 8
            }
297 8
        } else {
298
            $sectionsText = $content->getSectionsText($content->getContent());
299 8
            foreach ($sectionsText as $sectionText) {
300 8
                $extractedData[] = ['t' => '', 'o' => 'BT', 'c' => ''];
301 8
302
                $commandsText = $content->getCommandsText($sectionText);
303
                foreach ($commandsText as $command) {
304
                    $extractedData[] = $command;
305
                }
306 8
            }
307
        }
308
309
        return $extractedData;
310
    }
311
312
    /**
313
     * Gets all the decoded text data with it internal representation from a page.
314
     *
315
     * @param array $extractedRawData the extracted data return by extractRawData or
316
     *                                null if extractRawData should be called
317 7
     *
318
     * @return array An array with the data and the internal representation
319 7
     */
320 7
    public function extractDecodedRawData(array $extractedRawData = null): array
321
    {
322 7
        if (!isset($extractedRawData) || !$extractedRawData) {
323 7
            $extractedRawData = $this->extractRawData();
324 7
        }
325 7
        $currentFont = null; /** @var Font $currentFont */
326 7
        $clippedFont = null;
327 7
        foreach ($extractedRawData as &$command) {
328 5
            if ('Tj' == $command['o'] || 'TJ' == $command['o']) {
329 5
                $data = $command['c'];
330 5
                if (!\is_array($data)) {
331
                    $tmpText = '';
332
                    if (isset($currentFont)) {
333 5
                        $tmpText = $currentFont->decodeOctal($data);
334 5
                        //$tmpText = $currentFont->decodeHexadecimal($tmpText, false);
335 5
                    }
336
                    $tmpText = str_replace(
337
                            ['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
338 5
                            ['\\', '(', ')', "\n", "\r", "\t", ' '],
339 5
                            $tmpText
340 5
                    );
341
                    $tmpText = utf8_encode($tmpText);
342 5
                    if (isset($currentFont)) {
343 5
                        $tmpText = $currentFont->decodeContent($tmpText);
344
                    }
345 7
                    $command['c'] = $tmpText;
346 7
                    continue;
347 7
                }
348 5
                $numText = \count($data);
349
                for ($i = 0; $i < $numText; ++$i) {
350 7
                    if (0 != ($i % 2)) {
351 7
                        continue;
352 7
                    }
353 7
                    $tmpText = $data[$i]['c'];
354 7
                    $decodedText = isset($currentFont) ? $currentFont->decodeOctal($tmpText) : $tmpText;
355
                    $decodedText = str_replace(
356
                            ['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
357 7
                            ['\\', '(', ')', "\n", "\r", "\t", ' '],
358 7
                            $decodedText
359 5
                    );
360
                    $decodedText = utf8_encode($decodedText);
361 7
                    if (isset($currentFont)) {
362 7
                        $decodedText = $currentFont->decodeContent($decodedText);
363
                    }
364 7
                    $command['c'][$i]['c'] = $decodedText;
365 7
                    continue;
366 7
                }
367 7
            } elseif ('Tf' == $command['o'] || 'TF' == $command['o']) {
368 7
                $fontId = explode(' ', $command['c'])[0];
369
                $currentFont = $this->getFont($fontId);
370 7
                continue;
371
            } elseif ('Q' == $command['o']) {
372
                $currentFont = $clippedFont;
373
            } elseif ('q' == $command['o']) {
374
                $clippedFont = $currentFont;
375 7
            }
376
        }
377
378
        return $extractedRawData;
379
    }
380
381
    /**
382
     * Gets just the Text commands that are involved in text positions and
383
     * Text Matrix (Tm)
384
     *
385
     * It extract just the PDF commands that are involved with text positions, and
386
     * the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ
387
     *
388
     * @param array $extractedDecodedRawData The data extracted by extractDecodeRawData.
389
     *                                       If it is null, the method extractDecodeRawData is called.
390 5
     *
391
     * @return array An array with the text command of the page
392 5
     */
393 5
    public function getDataCommands(array $extractedDecodedRawData = null): array
394
    {
395 5
        if (!isset($extractedDecodedRawData) || !$extractedDecodedRawData) {
396 5
            $extractedDecodedRawData = $this->extractDecodedRawData();
397 5
        }
398
        $extractedData = [];
399
        foreach ($extractedDecodedRawData as $command) {
400
            switch ($command['o']) {
401
                /*
402 5
                 * BT
403 5
                 * Begin a text object, inicializind the Tm and Tlm to identity matrix
404 5
                 */
405
                case 'BT':
406
                    $extractedData[] = $command;
407
                    break;
408
409
                /*
410 5
                 * ET
411
                 * End a text object, discarding the text matrix
412
                 */
413
                case 'ET':
414
                    $extractedData[] = $command;
415
                    break;
416
417
                /*
418
                 * leading TL
419 5
                 * Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
420 3
                 * Initial value: 0
421 3
                 */
422
                case 'TL':
423
                    $extractedData[] = $command;
424
                    break;
425
426
                /*
427
                 * tx ty Td
428 5
                 * Move to the start of the next line, offset form the start of the
429 5
                 * current line by tx, ty.
430 5
                 */
431
                case 'Td':
432
                    $extractedData[] = $command;
433
                    break;
434
435
                /*
436
                 * tx ty TD
437
                 * Move to the start of the next line, offset form the start of the
438
                 * current line by tx, ty. As a side effect, this operator set the leading
439
                 * parameter in the text state. This operator has the same effect as the
440
                 * code:
441 5
                 * -ty TL
442
                 * tx ty Td
443
                 */
444
                case 'TD':
445
                    $extractedData[] = $command;
446
                    break;
447
448
                /*
449
                 * a b c d e f Tm
450
                 * Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
451 5
                 * all numbers, and the initial value for Tm and Tlm is the identity matrix
452 3
                 * [1 0 0 1 0 0]
453 3
                 */
454
                case 'Tm':
455
                    $extractedData[] = $command;
456
                    break;
457
458
                /*
459
                 * T*
460
                 * Move to the start of the next line. This operator has the same effect
461
                 * as the code:
462 5
                 * 0 Tl Td
463 3
                 * Where Tl is the current leading parameter in the text state.
464 3
                 */
465
                case 'T*':
466
                    $extractedData[] = $command;
467
                    break;
468
469
                /*
470 5
                 * string Tj
471 4
                 * Show a Text String
472 4
                 */
473
                case 'Tj':
474
                    $extractedData[] = $command;
475
                    break;
476
477
                /*
478
                 * string '
479
                 * Move to the next line and show a text string. This operator has the
480
                 * same effect as the code:
481 5
                 * T*
482
                 * string Tj
483
                 */
484
                case "'":
485
                    $extractedData[] = $command;
486
                    break;
487
488
                /*
489
                 * aw ac string "
490
                 * Move to the next lkine and show a text string, using aw as the word
491
                 * spacing and ac as the character spacing. This operator has the same
492
                 * effect as the code:
493
                 * aw Tw
494
                 * ac Tc
495
                 * string '
496 5
                 * Tw set the word spacing, Tw, to wordSpace.
497
                 * Tc Set the character spacing, Tc, to charsSpace.
498
                 */
499
                case '"':
500
                    $extractedData[] = $command;
501
                    break;
502
503
                /*
504
                 * array TJ
505
                 * Show one or more text strings allow individual glyph positioning.
506
                 * Each lement of array con be a string or a number. If the element is
507
                 * a string, this operator shows the string. If it is a number, the
508
                 * operator adjust the text position by that amount; that is, it translates
509
                 * the text matrix, Tm. This amount is substracted form the current
510
                 * horizontal or vertical coordinate, depending on the writing mode.
511
                 * in the default coordinate system, a positive adjustment has the effect
512 5
                 * of moving the next glyph painted either to the left or down by the given
513 5
                 * amount.
514 5
                 */
515
                case 'TJ':
516
                    $extractedData[] = $command;
517
                    break;
518
                default:
519 5
            }
520
        }
521
522
        return $extractedData;
523
    }
524
525
    /**
526
     * Gets the Text Matrix of the text in the page
527
     *
528
     * Return an array where every item is an array where the first item is the
529
     * Text Matrix (Tm) and the second is a string with the text data.  The Text matrix
530
     * is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the
531
     * text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text.
532
     *
533
     * @param array $dataCommands the data extracted by getDataCommands
534
     *                            if null getDataCommands is called
535
     *
536 4
     * @return array an array with the data of the page including the Tm information
537
     *               of any text in the page
538 4
     */
539 4
    public function getDataTm(array $dataCommands = null): array
540
    {
541
        if (!isset($dataCommands) || !$dataCommands) {
542
            $dataCommands = $this->getDataCommands();
543
        }
544
545 4
        /*
546
         * At the beginning of a text object Tm is the identity matrix
547
         */
548
        $defaultTm = ['1', '0', '0', '1', '0', '0'];
549
550 4
        /*
551
         *  Set the text leading used by T*, ' and " operators
552
         */
553
        $defaultTl = 0;
554
555 4
        /*
556 4
         * Setting where are the X and Y coordinates in the matrix (Tm)
557 4
         */
558 4
        $x = 4;
559
        $y = 5;
560 4
        $Tx = 0;
561 4
        $Ty = 0;
562
563 4
        $Tm = $defaultTm;
564 4
        $Tl = $defaultTl;
565 4
566 4
        $extractedTexts = $this->getTextArray();
567 4
        $extractedData = [];
568
        foreach ($dataCommands as $command) {
569
            $currentText = $extractedTexts[\count($extractedData)];
570
            switch ($command['o']) {
571
                /*
572 4
                 * BT
573 4
                 * Begin a text object, inicializind the Tm and Tlm to identity matrix
574 4
                 */
575 4
                case 'BT':
576 4
                    $Tm = $defaultTm;
577 4
                    $Tl = $defaultTl; //review this.
578
                    $Tx = 0;
579
                    $Ty = 0;
580
                    break;
581
582
                /*
583 4
                 * ET
584
                 * End a text object, discarding the text matrix
585
                 */
586
                case 'ET':
587
                    $Tm = $defaultTm;
588
                    $Tl = $defaultTl;  //review this
589
                    $Tx = 0;
590
                    $Ty = 0;
591
                    break;
592
593
                /*
594
                 * leading TL
595 4
                 * Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
596 2
                 * Initial value: 0
597 2
                 */
598
                case 'TL':
599
                    $Tl = (float) $command['c'];
600
                    break;
601
602
                /*
603
                 * tx ty Td
604 4
                 * Move to the start of the next line, offset form the start of the
605 4
                 * current line by tx, ty.
606 4
                 */
607 4
                case 'Td':
608 4
                    $coord = explode(' ', $command['c']);
609 4
                    $Tx += (float) $coord[0];
610 4
                    $Ty += (float) $coord[1];
611
                    $Tm[$x] = (string) $Tx;
612
                    $Tm[$y] = (string) $Ty;
613
                    break;
614
615
                /*
616
                 * tx ty TD
617
                 * Move to the start of the next line, offset form the start of the
618
                 * current line by tx, ty. As a side effect, this operator set the leading
619
                 * parameter in the text state. This operator has the same effect as the
620
                 * code:
621 4
                 * -ty TL
622
                 * tx ty Td
623
                 */
624
                case 'TD':
625
                    $coord = explode(' ', $command['c']);
626
                    $Tl = (float) $coord[1];
627
                    $Tx += (float) $coord[0];
628
                    $Ty -= (float) $coord[1];
629
                    $Tm[$x] = (string) $Tx;
630
                    $Tm[$y] = (string) $Ty;
631
                    break;
632
633
                /*
634
                 * a b c d e f Tm
635
                 * Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
636 4
                 * all numbers, and the initial value for Tm and Tlm is the identity matrix
637 2
                 * [1 0 0 1 0 0]
638 2
                 */
639 2
                case 'Tm':
640 2
                    $Tm = explode(' ', $command['c']);
641
                    $Tx = (float) $Tm[$x];
642
                    $Ty = (float) $Tm[$y];
643
                    break;
644
645
                /*
646
                 * T*
647
                 * Move to the start of the next line. This operator has the same effect
648
                 * as the code:
649 4
                 * 0 Tl Td
650 2
                 * Where Tl is the current leading parameter in the text state.
651 2
                 */
652 2
                case 'T*':
653
                    $Ty -= $Tl;
654
                    $Tm[$y] = (string) $Ty;
655
                    break;
656
657
                /*
658 4
                 * string Tj
659 3
                 * Show a Text String
660 3
                 */
661
                case 'Tj':
662
                    $extractedData[] = [$Tm, $currentText];
663
                    break;
664
665
                /*
666
                 * string '
667
                 * Move to the next line and show a text string. This operator has the
668
                 * same effect as the code:
669 4
                 * T*
670
                 * string Tj
671
                 */
672
                case "'":
673
                    $Ty -= $Tl;
674
                    $Tm[$y] = (string) $Ty;
675
                    $extractedData[] = [$Tm, $currentText];
676
                    break;
677
678
                /*
679
                 * aw ac string "
680
                 * Move to the next line and show a text string, using aw as the word
681
                 * spacing and ac as the character spacing. This operator has the same
682
                 * effect as the code:
683
                 * aw Tw
684
                 * ac Tc
685
                 * string '
686 4
                 * Tw set the word spacing, Tw, to wordSpace.
687
                 * Tc Set the character spacing, Tc, to charsSpace.
688
                 */
689
                case '"':
690
                    $data = explode(' ', $currentText);
691
                    $Ty -= $Tl;
692
                    $Tm[$y] = (string) $Ty;
693
                    $extractedData[] = [$Tm, $data[2]]; //Verify
694
                    break;
695
696
                /*
697
                 * array TJ
698
                 * Show one or more text strings allow individual glyph positioning.
699
                 * Each lement of array con be a string or a number. If the element is
700
                 * a string, this operator shows the string. If it is a number, the
701
                 * operator adjust the text position by that amount; that is, it translates
702
                 * the text matrix, Tm. This amount is substracted form the current
703
                 * horizontal or vertical coordinate, depending on the writing mode.
704
                 * in the default coordinate system, a positive adjustment has the effect
705 4
                 * of moving the next glyph painted either to the left or down by the given
706 4
                 * amount.
707 4
                 */
708
                case 'TJ':
709
                    $extractedData[] = [$Tm, $currentText];
710
                    break;
711 4
                default:
712
            }
713 4
        }
714
        $this->dataTm = $extractedData;
715
716
        return $extractedData;
717
    }
718
719
    /**
720
     * Gets text data that are around the given coordinates (X,Y)
721
     *
722
     * If the text is in near the given coordinates (X,Y) (or the TM info),
723
     * the text is returned.  The extractedData return by getDataTm, could be use to see
724
     * where is the coordinates of a given text, using the TM info for it.
725
     *
726
     * @param float $x      The X value of the coordinate to search for. if null
727
     *                      just the Y value is considered (same Row)
728
     * @param float $y      The Y value of the coordinate to search for
729
     *                      just the X value is considered (same column)
730
     * @param float $xError The value less or more to consider an X to be "near"
731
     * @param float $yError The value less or more to consider an Y to be "near"
732
     *
733
     * @return array An array of text that are near the given coordinates. If no text
734 1
     *               "near" the x,y coordinate, an empty array is returned. If Both, x
735
     *               and y coordinates are null, null is returned.
736 1
     */
737 1
    public function getTextXY(float $x = null, float $y = null, float $xError = 0, float $yError = 0): array
738
    {
739
        if (!isset($this->dataTm) || !$this->dataTm) {
740 1
            $this->getDataTm();
741 1
        }
742
743
        if (null !== $x) {
744 1
            $x = (float) $x;
745 1
        }
746
747
        if (null !== $y) {
748 1
            $y = (float) $y;
749
        }
750
751
        if (null === $x && null === $y) {
752 1
            return [];
753 1
        }
754
755 1
        $xError = (float) $xError;
756 1
        $yError = (float) $yError;
757 1
758 1
        $extractedData = [];
759 1
        foreach ($this->dataTm as $item) {
760 1
            $tm = $item[0];
761 1
            $xTm = (float) $tm[4];
762
            $yTm = (float) $tm[5];
763
            $text = $item[1];
764
            if (null === $y) {
765
                if (($xTm >= ($x - $xError)) &&
766
                    ($xTm <= ($x + $xError))) {
767
                    $extractedData[] = [$tm, $text];
768 1
                    continue;
769
                }
770
            }
771
            if (null === $x) {
772
                if (($yTm >= ($y - $yError)) &&
773
                    ($yTm <= ($y + $yError))) {
774
                    $extractedData[] = [$tm, $text];
775 1
                    continue;
776 1
                }
777 1
            }
778 1
            if (($xTm >= ($x - $xError)) &&
779 1
                ($xTm <= ($x + $xError)) &&
780 1
                ($yTm >= ($y - $yError)) &&
781
                ($yTm <= ($y + $yError))) {
782
                $extractedData[] = [$tm, $text];
783
                continue;
784 1
            }
785
        }
786
787
        return $extractedData;
788
    }
789
}
790