Completed
Pull Request — master (#295)
by
unknown
02:11
created

Page::getTextArray()   B

Complexity

Conditions 10
Paths 7

Size

Total Lines 42
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 1
Metric Value
cc 10
eloc 24
c 2
b 0
f 1
nc 7
nop 1
dl 0
loc 42
rs 7.6666

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\Element\ElementArray;
34
use Smalot\PdfParser\Element\ElementMissing;
35
use Smalot\PdfParser\Element\ElementNull;
36
use Smalot\PdfParser\Element\ElementXRef;
37
38
/**
39
 * Class Page
40
 */
41
class Page extends PDFObject
42
{
43
    /**
44
     * @var Font[]
45
     */
46
    protected $fonts = null;
47
48
    /**
49
     * @var PDFObject[]
50
     */
51
    protected $xobjects = null;
52
    
53
    /**
54
     * @var $dataTm[]
55
     */
56
    protected $dataTm = null;
57
58
    /**
59
     * @return Font[]
60
     */
61
    public function getFonts()
62
    {
63
        if (null !== $this->fonts) {
64
            return $this->fonts;
65
        }
66
67
        $resources = $this->get('Resources');
68
69
        if (method_exists($resources, 'has') && $resources->has('Font')) {
70
            if ($resources->get('Font') instanceof Header) {
71
                $fonts = $resources->get('Font')->getElements();
72
            } else {
73
                $fonts = $resources->get('Font')->getHeader()->getElements();
74
            }
75
76
            $table = [];
77
78
            foreach ($fonts as $id => $font) {
79
                if ($font instanceof Font) {
80
                    $table[$id] = $font;
81
82
                    // Store too on cleaned id value (only numeric)
83
                    $id = preg_replace('/[^0-9\.\-_]/', '', $id);
84
                    if ('' != $id) {
85
                        $table[$id] = $font;
86
                    }
87
                }
88
            }
89
90
            return $this->fonts = $table;
91
        } else {
92
            return [];
93
        }
94
    }
95
96
    /**
97
     * @param string $id
98
     *
99
     * @return Font
100
     */
101
    public function getFont($id)
102
    {
103
        $fonts = $this->getFonts();
104
105
        if (isset($fonts[$id])) {
106
            return $fonts[$id];
107
        } else {
108
            $id = preg_replace('/[^0-9\.\-_]/', '', $id);
109
110
            if (isset($fonts[$id])) {
111
                return $fonts[$id];
112
            } else {
113
                return null;
114
            }
115
        }
116
    }
117
118
    /**
119
     * Support for XObject
120
     *
121
     * @return PDFObject[]
122
     */
123
    public function getXObjects()
124
    {
125
        if (null !== $this->xobjects) {
126
            return $this->xobjects;
127
        }
128
129
        $resources = $this->get('Resources');
130
131
        if (method_exists($resources, 'has') && $resources->has('XObject')) {
132
            if ($resources->get('XObject') instanceof Header) {
133
                $xobjects = $resources->get('XObject')->getElements();
134
            } else {
135
                $xobjects = $resources->get('XObject')->getHeader()->getElements();
136
            }
137
138
            $table = [];
139
140
            foreach ($xobjects as $id => $xobject) {
141
                $table[$id] = $xobject;
142
143
                // Store too on cleaned id value (only numeric)
144
                $id = preg_replace('/[^0-9\.\-_]/', '', $id);
145
                if ('' != $id) {
146
                    $table[$id] = $xobject;
147
                }
148
            }
149
150
            return $this->xobjects = $table;
151
        } else {
152
            return [];
153
        }
154
    }
155
156
    /**
157
     * @param string $id
158
     *
159
     * @return PDFObject
160
     */
161
    public function getXObject($id)
162
    {
163
        $xobjects = $this->getXObjects();
164
165
        if (isset($xobjects[$id])) {
166
            return $xobjects[$id];
167
        } else {
168
            return null;
169
            /*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
170
171
            if (isset($xobjects[$id])) {
172
                return $xobjects[$id];
173
            } else {
174
                return null;
175
            }*/
176
        }
177
    }
178
179
    /**
180
     * @param Page
181
     *
182
     * @return string
183
     */
184
    public function getText(self $page = null)
185
    {
186
        if ($contents = $this->get('Contents')) {
187
            if ($contents instanceof ElementMissing) {
188
                return '';
189
            } elseif ($contents instanceof ElementNull) {
190
                return '';
191
            } elseif ($contents instanceof PDFObject) {
192
                $elements = $contents->getHeader()->getElements();
193
194
                if (is_numeric(key($elements))) {
195
                    $new_content = '';
196
197
                    foreach ($elements as $element) {
198
                        if ($element instanceof ElementXRef) {
199
                            $new_content .= $element->getObject()->getContent();
200
                        } else {
201
                            $new_content .= $element->getContent();
202
                        }
203
                    }
204
205
                    $header = new Header([], $this->document);
206
                    $contents = new PDFObject($this->document, $header, $new_content);
207
                }
208
            } elseif ($contents instanceof ElementArray) {
209
                // Create a virtual global content.
210
                $new_content = '';
211
212
                foreach ($contents->getContent() as $content) {
213
                    $new_content .= $content->getContent()."\n";
214
                }
215
216
                $header = new Header([], $this->document);
217
                $contents = new PDFObject($this->document, $header, $new_content);
218
            }
219
220
            return $contents->getText($this);
221
        }
222
223
        return '';
224
    }
225
226
227
    /**
228
     * @param Page
229
     *
230
     * @return array
231
     */
232
    public function getTextArray(self $page = null)
233
    {
234
        if ($contents = $this->get('Contents')) {
235
            if ($contents instanceof ElementMissing) {
236
                return [];
237
            } elseif ($contents instanceof ElementNull) {
238
                return [];
239
            } elseif ($contents instanceof PDFObject) {
240
                $elements = $contents->getHeader()->getElements();
241
242
                if (is_numeric(key($elements))) {
243
                    $new_content = '';
244
245
                    /** @var PDFObject $element */
246
                    foreach ($elements as $element) {
247
                        if ($element instanceof ElementXRef) {
248
                            $new_content .= $element->getObject()->getContent();
249
                        } else {
250
                            $new_content .= $element->getContent();
251
                        }
252
                    }
253
254
                    $header = new Header([], $this->document);
255
                    $contents = new PDFObject($this->document, $header, $new_content);
256
                }
257
            } elseif ($contents instanceof ElementArray) {
258
                // Create a virtual global content.
259
                $new_content = '';
260
261
                /** @var PDFObject $content */
262
                foreach ($contents->getContent() as $content) {
263
                    $new_content .= $content->getContent()."\n";
264
                }
265
266
                $header = new Header([], $this->document);
267
                $contents = new PDFObject($this->document, $header, $new_content);
268
            }
269
270
            return $contents->getTextArray($this);
271
        }
272
273
        return [];
274
=======
0 ignored issues
show
Bug introduced by
A parse error occurred: Syntax error, unexpected T_IS_IDENTICAL on line 274 at column 0
Loading history...
275
	/**
276
	 * @param Page
277
	 *
278
	 * @return array
279
	 */
280
	public function getTextArray(Page $page = null)
281
	{
282
		if ($contents = $this->get('Contents')) {
283
284
			if ($contents instanceof ElementMissing) {
285
				return array();
286
			} elseif ($contents instanceof ElementNull) {
287
				return array();
288
			} elseif ($contents instanceof PDFObject) {
289
				$elements = $contents->getHeader()->getElements();
290
291
				if (is_numeric(key($elements))) {
292
					$new_content = '';
293
294
					/** @var PDFObject $element */
295
					foreach ($elements as $element) {
296
						if ($element instanceof ElementXRef) {
297
							$new_content .= $element->getObject()->getContent();
298
						} else {
299
							$new_content .= $element->getContent();
300
						}
301
					}
302
303
					$header   = new Header(array(), $this->document);
304
					$contents = new PDFObject($this->document, $header, $new_content);
305
				}
306
			} elseif ($contents instanceof ElementArray) {
307
				// Create a virtual global content.
308
				$new_content = '';
309
310
				/** @var PDFObject $content */
311
          foreach ($contents->getContent() as $content) {
312
					$new_content .= $content->getContent() . "\n";
313
				}
314
315
				$header   = new Header(array(), $this->document);
316
				$contents = new PDFObject($this->document, $header, $new_content);
317
			}
318
319
			return $contents->getTextArray($this);
320
		}
321
322
		return array();
323
	}
324
    
325
	/*
326
     * Gets all the text data with its internal representation of the page.
327
     *
328
     * @return array An array with the data and the internal representation
329
     *
330
     */
331
    
332
    public function extractRawData()
333
    {  
334
        $text = $this->getText();
335
        /*
336
         * Now you can get the complete content of the object with the text on it
337
         */
338
        $extractedData = [];
339
        $content = $this->get("Contents");
340
        if (isset($content->value)){
341
            $values = $content->value;
342
            $text = "";
343
            foreach($values as $section){
344
                $text .= $section->getContent();
345
            }
346
            $sectionsText = $this->getSectionsText($text); 
347
            foreach ($sectionsText as $sectionText){
348
                $commandsText = $this->getCommandsText($sectionText);
349
                foreach ($commandsText as $command){
350
                    $extractedData[] = $command;
351
                }
352
            }
353
        } else {
354
            $sectionsText = $content->getSectionsText($content->getContent());
355
            foreach ($sectionsText as $sectionText){
356
                $commandsText = $content->getCommandsText($sectionText);
357
                foreach ($commandsText as $command){
358
                    $extractedData[] = $command;
359
                }
360
            }
361
        }
362
        return $extractedData;
363
    }
364
    
365
    /*
366
     * Gets all the decoded text data with it internal representation from a page.
367
     *
368
     * @param array $extractedRawData the extracted data return by extractRawData or
369
     *                                null if extractRawData should be called
370
     *
371
     * @return array An array with the data and the internal representation
372
     *
373
     */
374
    public function extractDecodedRawData($extractedRawData = null)
375
    {    
376
        if (!isset($extractedRawData) or !$extractedRawData) {
377
            $extractedRawData = $this->extractRawData();
378
        }
379
        $unicode = true;
380
        $currentFont = null;
381
        foreach ($extractedRawData as &$command){
382
            if ($command["o"] == "Tj" or $command["o"] == "TJ"){
383
                $text = [];
384
                $data = $command["c"];
385
                if (!is_array($data)){
386
                    if (isset($currentFont)){
387
                        $tmpText = $currentFont->decodeOctal($data);
388
                        #$tmpText = $currentFont->decodeHexadecimal($tmpText, false);
389
                    }
390
                    $tmpText = $tjText = str_replace(
391
                            array('\\\\', '\(', '\)', '\n', '\r', '\t', '\ '),
392
                            array('\\', '(', ')', "\n", "\r", "\t", ' '),
393
                            $tmpText
394
                    );
395
                    $tmpText = utf8_encode($tmpText);
396
                    if (isset($currentFont)){
397
                        $tmpText = $currentFont->decodeContent($tmpText, $unicode);
398
                    }
399
                    $command["c"] = $tmpText;
400
                    continue;
401
                }
402
                $numText = count($data);
403
                for($i= 0; $i < $numText; $i++){
404
                    if (($i % 2) != 0 ){
405
                        continue;
406
                    }
407
                    $tmpText = $data[$i]["c"];
408
                    if (isset($currentFont)){
409
                        $decodedText = $currentFont->decodeOctal($tmpText);
410
                        #$tmpText = $currentFont->decodeHexadecimal($tmpText, false);
411
                    }
412
                    $decodedText = $tjText = str_replace(
413
                            array('\\\\', '\(', '\)', '\n', '\r', '\t', '\ '),
414
                            array('\\', '(', ')', "\n", "\r", "\t", ' '),
415
                            $decodedText
416
                    );
417
                    $decodedText = utf8_encode($decodedText);
418
                    if (isset($currentFont)){
419
                        $decodedText = $currentFont->decodeContent($decodedText, $unicode);
420
                    }
421
                    $command["c"][$i]["c"] = $decodedText;
422
                    continue;
423
                }
424
            }  elseif ($command["o"] == "Tf" or $command["o"] == "TF"){
425
                $fontId = explode(" ", $command["c"])[0];
426
                $currentFont = $this->getFont($fontId);
427
                continue;
428
            } 
429
        }
430
        return $extractedRawData;
431
    }
432
    
433
    /*
434
     * Gets just the Text commands that are involved in text positions and 
435
     * Text Matrix (Tm)
436
     *
437
     * It extract just the PDF commands that are involved with text positions, and 
438
     * the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ
439
     *
440
     * @param array $extractedDecodedRawData The data extracted by extractDecodeRawData
441
                           if it is null, the method extractDecodeRawData is called.
442
     *
443
     * @return array An array with the text command of the page
444
     *
445
     */
446
    public function getDataCommands($extractedDecodedRawData = null)
447
    {
448
        if (!isset($extractedDecodedRawData) or !$extractedDecodedRawData){
449
            $extractedDecodedRawData = $this->extractDecodedRawData();
450
        }
451
        $extractedData = [];
452
        foreach ($extractedDecodedRawData as $command){
453
            switch ($command["o"]) {
454
                    
455
                /*
456
                 * BT
457
                 * Begin a text object, inicializind the Tm and Tlm to identity matrix 
458
                 */
459
                case "BT":
460
                    $extractedData[] = $command;
461
                    break;
462
                
463
                /*
464
                 * ET
465
                 * End a text object, discarding the text matrix
466
                 */
467
                case "ET":
468
                    $extractedData[] = $command;
469
                    break;
470
                    
471
                /*
472
                 * leading TL
473
                 * Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
474
                 * Initial value: 0
475
                 */
476
                case "TL":
477
                    $extractedData[] = $command;
478
                    break;
479
                
480
                /*
481
                 * tx ty Td
482
                 * Move to the start of the next line, offset form the start of the 
483
                 * current line by tx, ty.
484
                 */
485
                case "Td":
486
                    $extractedData[] = $command;
487
                    break;
488
                
489
                /*
490
                 * tx ty TD
491
                 * Move to the start of the next line, offset form the start of the 
492
                 * current line by tx, ty. As a side effect, this operator set the leading
493
                 * parameter in the text state. This operator has the same effect as the
494
                 * code:
495
                 * -ty TL
496
                 * tx ty Td
497
                 */
498
                case "TD":
499
                    $extractedData[] = $command;
500
                    break;
501
                
502
                /*
503
                 * a b c d e f Tm
504
                 * Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
505
                 * all numbers, and the initial value for Tm and Tlm is the identity matrix
506
                 * [1 0 0 1 0 0]
507
                 */
508
                case "Tm":
509
                    $extractedData[] = $command;
510
                    break;
511
                
512
                /*
513
                 * T*
514
                 * Move to the start of the next line. This operator has the same effect
515
                 * as the code:
516
                 * 0 Tl Td 
517
                 * Where Tl is the current leading parameter in the text state.
518
                 */
519
                case "T*":
520
                    $extractedData[] = $command;
521
                    break;
522
                    
523
                /*
524
                 * string Tj
525
                 * Show a Text String
526
                 */
527
                case "Tj":
528
                    $extractedData[] = $command;
529
                    break;
530
                
531
                /*
532
                 * string '
533
                 * Move to the next line and show a text string. This operator has the
534
                 * same effect as the code:
535
                 * T*
536
                 * string Tj
537
                 */
538
                case "'":
539
                    $extractedData[] = $command;
540
                    break;
541
                
542
                /*
543
                 * aw ac string "
544
                 * Move to the next lkine and show a text string, using aw as the word
545
                 * spacing and ac as the character spacing. This operator has the same
546
                 * effect as the code:
547
                 * aw Tw
548
                 * ac Tc
549
                 * string '
550
                 * Tw set the word spacing, Tw, to wordSpace.
551
                 * Tc Set the character spacing, Tc, to charsSpace.
552
                 */
553
                case '"':
554
                    $extractedData[] = $command;
555
                    break;
556
                
557
                /*
558
                 * array TJ
559
                 * Show one or more text strings allow individual glyph positioning.
560
                 * Each lement of array con be a string or a number. If the element is
561
                 * a string, this operator shows the string. If it is a number, the 
562
                 * operator adjust the text position by that amount; that is, it translates
563
                 * the text matrix, Tm. This amount is substracted form the current
564
                 * horizontal or vertical coordinate, depending on the writing mode.
565
                 * in the default coordinate system, a positive adjustment has the effect
566
                 * of moving the next glyph painted either to the left or down by the given
567
                 * amount.
568
                 */
569
                case "TJ":
570
                    $extractedData[] = $command;
571
                    break;
572
                default:
573
            }
574
        }
575
        return $extractedData;
576
    }
577
    
578
    /*
579
     * Gets the Text Matrix of the text in the page
580
     *
581
     * Return an array where every item is an array where the first item is the 
582
     * Text Matrix (Tm) and the second is a string with the text data.  The Text matrix
583
     * is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the 
584
     * text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text.
585
     * 
586
     * @param array $dataCommands the data extracted by getDataCommands
587
     *                     if null getDataCommands is called.
588
     *
589
     * @return array An array with the data of the page including the Tm information
590
     *         of any text in the page.
591
     */
592
    
593
    public function getDataTm($dataCommands=null){
594
        if (!isset($dataCommands) or !$dataCommands){
595
            $dataCommands = $this->getDataCommands();
596
        }
597
        
598
        /*
599
         * At the beginning of a text object Tm is the identity matrix
600
         */
601
        $defaultTm = ["1", "0", "0", "1", "0", "0"];
602
        
603
        /*
604
         *  Set the text leading used by T*, ' and " operators
605
         */
606
        $defaultTl = 0;
607
        
608
        /*
609
         * Setting where are the X and Y coordinates in the matrix (Tm)
610
         */
611
        $x = 4;
612
        $y = 5;
613
        $Tx = 0;
614
        $Ty = 0;
615
        
616
        $Tm = $defaultTm;
617
        $Tl = $defaultTl;
618
        
619
        $extractedData = [];
620
        foreach ($dataCommands as $command){
621
            switch ($command["o"]) {
622
                    
623
                /*
624
                 * BT
625
                 * Begin a text object, inicializind the Tm and Tlm to identity matrix 
626
                 */
627
                case "BT":
628
                    $Tm = $defaultTl;
629
                    $Tl = $defaultTl; //review this.
630
                    $Tx = 0;
631
                    $Ty = 0;
632
                    break;
633
                
634
                /*
635
                 * ET
636
                 * End a text object, discarding the text matrix
637
                 */
638
                case "ET":
639
                    $Tm = $defaultTl;
640
                    $Tl = $defaultTl;  //review this
641
                    $Tx = 0;
642
                    $Ty = 0;
643
                    break;
644
                    
645
                /*
646
                 * leading TL
647
                 * Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
648
                 * Initial value: 0
649
                 */
650
                case "TL":
651
                    $Tl = (float) $command["c"];
652
                    break;
653
                
654
                /*
655
                 * tx ty Td
656
                 * Move to the start of the next line, offset form the start of the 
657
                 * current line by tx, ty.
658
                 */
659
                case "Td":
660
                    $coord = explode(" ",$command["c"]);
661
                    $Tx += (float) $coord[0];
662
                    $Ty += (float) $coord[1];
663
                    $Tm[$x] = (string) $Tx;
664
                    $Tm[$y] = (string) $Ty;
665
                    break;
666
                
667
                /*
668
                 * tx ty TD
669
                 * Move to the start of the next line, offset form the start of the 
670
                 * current line by tx, ty. As a side effect, this operator set the leading
671
                 * parameter in the text state. This operator has the same effect as the
672
                 * code:
673
                 * -ty TL
674
                 * tx ty Td
675
                 */
676
                case "TD":
677
                    $coord = explode(" ",$command["c"]);
678
                    $Tl = (float) $coord[1];
679
                    $Tx += (float) $coord[0];
680
                    $Ty -= (float) $coord[1];
681
                    $Tm[$x] = (string) $Tx;
682
                    $Tm[$y] = (string) $Ty;
683
                    break;
684
                
685
                /*
686
                 * a b c d e f Tm
687
                 * Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
688
                 * all numbers, and the initial value for Tm and Tlm is the identity matrix
689
                 * [1 0 0 1 0 0]
690
                 */
691
                case "Tm":
692
                    $Tm = explode(" ", $command["c"]);
693
                    $Tx = (float) $Tm[$x];
694
                    $Ty = (float) $Tm[$y];
695
                    break;
696
                
697
                /*
698
                 * T*
699
                 * Move to the start of the next line. This operator has the same effect
700
                 * as the code:
701
                 * 0 Tl Td 
702
                 * Where Tl is the current leading parameter in the text state.
703
                 */
704
                case "T*":
705
                    $Ty -= $Tl;
706
                    $Tm[$y] = (string) $Ty;
707
                    break;
708
                    
709
                /*
710
                 * string Tj
711
                 * Show a Text String
712
                 */
713
                case "Tj":
714
                    $extractedData[] = [$Tm, $command["c"]];
715
                    break;
716
                
717
                /*
718
                 * string '
719
                 * Move to the next line and show a text string. This operator has the
720
                 * same effect as the code:
721
                 * T*
722
                 * string Tj
723
                 */
724
                case "'":
725
                    $Ty -= Tl;
726
                    $Tm[$y] = (string) $Ty;
727
                    $extractedData[] = [$Tm, $command["c"]];
728
                    break;
729
                
730
                /*
731
                 * aw ac string "
732
                 * Move to the next line and show a text string, using aw as the word
733
                 * spacing and ac as the character spacing. This operator has the same
734
                 * effect as the code:
735
                 * aw Tw
736
                 * ac Tc
737
                 * string '
738
                 * Tw set the word spacing, Tw, to wordSpace.
739
                 * Tc Set the character spacing, Tc, to charsSpace.
740
                 */
741
                case '"':
742
                    $data = explode(" ", $command["c"]);
743
                    $Ty -= Tl;
744
                    $Tm[$y] = (string) $Ty;
745
                    $extractedData[] = [$Tm, $data[2]]; //Verify
746
                    break;
747
                
748
                /*
749
                 * array TJ
750
                 * Show one or more text strings allow individual glyph positioning.
751
                 * Each lement of array con be a string or a number. If the element is
752
                 * a string, this operator shows the string. If it is a number, the 
753
                 * operator adjust the text position by that amount; that is, it translates
754
                 * the text matrix, Tm. This amount is substracted form the current
755
                 * horizontal or vertical coordinate, depending on the writing mode.
756
                 * in the default coordinate system, a positive adjustment has the effect
757
                 * of moving the next glyph painted either to the left or down by the given
758
                 * amount.
759
                 */
760
                case "TJ":
761
                    $text = [];
762
                    $data = $command["c"];
763
                    $numText = count($data);
764
                    for($i= 0; $i < $numText; $i++){
765
                        if ($data[$i]["t"] == "n"){
766
                            continue;
767
                        }
768
                        $tmpText = $data[$i]["c"];
769
                        $text[] = $tmpText;
770
                    }
771
                    $tjText = "".implode($text);
772
                    $extractedData[] = [$Tm, $tjText];
773
                    break;
774
                default:
775
            }
776
        }
777
        $this->dataTm = $extractedData;
778
        return $extractedData;
779
    }
780
    
781
    /*
782
     * Gets text data that are around the given coordinates (X,Y)
783
     *
784
     * If the text is in near the given coordinates (X,Y) (or the TM info), 
785
     * the text is returned.  The extractedData return by getDataTm, could be use to see
786
     * where is the coordinates of a given text, using the TM info for it.
787
     *
788
     * @param float $x The X value of the coordinate to search for. if null
789
     *                 just the Y value is considered (same Row)
790
     * @param float $y The Y value of the coordinate to search for
791
     *                 just the X value is considered (same column)
792
     * @param float $xError The value less or more to consider an X to be "near"
793
     * @param float $yError The value less or more to consider an Y to be "near"
794
     *
795
     * @return array An array of text that are near the given coordinates. If no text
796
     *               "near" the x,y coordinate, an empty array is returned. If Both, x
797
     *               and y coordinates are null, null is returned.
798
     */
799
    public function getTextXY($x, $y, $xError = 0, $yError = 0){
800
        if (!isset($this->dataTm) or !$this->dataTm){
801
            $this->getDataTm();
802
        }
803
        if (isset($x)){
804
            $x = (float) $x;
805
        }
806
        if (isset($y)){
807
            $y = (float) $y;
808
        }
809
        if (!isset($x) and !isset($y)){
810
            return null;
811
        }
812
        
813
        if (!isset($xError)){
814
            $xError = 0;
815
        } else {
816
            $xError = (float) $xError;
817
        }
818
        if (!isset($yError)){
819
            $yError = 0;
820
        } else {
821
            $yError = (float) $yError;
822
        }
823
        $extractedData = [];
824
        foreach ($this->dataTm as $item){
825
            $tm = $item[0];
826
            $xTm = (float) $tm[4];
827
            $yTm = (float) $tm[5];
828
            $text = $item[1];
829
            if (!isset($y)){
830
                if (($xTm >= ($x - $xError)) and 
831
                    ($xTm <= ($x + $xError))) {
832
                        $extractedData[] = [$tm, $text];
833
                        continue;
834
                    }
835
            }
836
            if (!isset($x)){
837
                if (($yTm >= ($y - $yError)) and 
838
                    ($yTm <= ($y + $yError))) {
839
                        $extractedData[] = [$tm, $text];
840
                        continue;
841
                    }
842
            }
843
            if (($xTm >= ($x - $xError)) and 
844
                ($xTm <= ($x + $xError)) and 
845
                ($yTm >= ($y - $yError)) and 
846
                ($yTm<= ($y + $yError))) {
847
                $extractedData[] = [$tm, $text];
848
                continue;
849
            }
850
        }
851
        return $extractedData;
852
    }
853
}
854