Completed
Pull Request — master (#324)
by Jeremy
04:15 queued 02:11
created

PageTest::testGetTextXY()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 96
Code Lines 69

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
eloc 69
c 2
b 0
f 0
nc 1
nop 0
dl 0
loc 96
rs 8.6763

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Integration;
34
35
use Smalot\PdfParser\Document;
36
use Smalot\PdfParser\Element\ElementMissing;
37
use Smalot\PdfParser\Font;
38
use Smalot\PdfParser\Page;
39
use Tests\Smalot\PdfParser\TestCase;
40
41
class PageTest extends TestCase
42
{
43
    public function testGetFonts()
44
    {
45
        // Document with text.
46
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
47
        $parser = $this->getParserInstance();
48
        $document = $parser->parseFile($filename);
49
        $pages = $document->getPages();
50
        $page = $pages[0];
51
52
        // the first to load data.
53
        $fonts = $page->getFonts();
54
        $this->assertTrue(0 < \count($fonts));
55
        foreach ($fonts as $font) {
56
            $this->assertTrue($font instanceof Font);
57
        }
58
        // the second to use cache.
59
        $fonts = $page->getFonts();
60
        $this->assertTrue(0 < \count($fonts));
61
62
        // ------------------------------------------------------
63
        // Document without text.
64
        $filename = $this->rootDir.'/samples/Document3_pdfcreator_nocompressed.pdf';
65
        $document = $parser->parseFile($filename);
66
        $pages = $document->getPages();
67
        $page = $pages[0];
68
69
        // the first to load data.
70
        $fonts = $page->getFonts();
71
        $this->assertEquals(0, \count($fonts));
72
        // the second to use cache.
73
        $fonts = $page->getFonts();
74
        $this->assertEquals(0, \count($fonts));
75
    }
76
77
    public function testGetFontsElementMissing()
78
    {
79
        $headerResources = $this->getMockBuilder('Smalot\PdfParser\Header')
80
            ->disableOriginalConstructor()
81
            ->getMock();
82
83
        $headerResources->expects($this->once())
84
            ->method('has')
85
            ->willReturn(true);
86
87
        $headerResources->expects($this->once())
88
            ->method('get')
89
            ->willReturn(new ElementMissing());
90
91
        $header = $this->getMockBuilder('Smalot\PdfParser\Header')
92
            ->disableOriginalConstructor()
93
            ->getMock();
94
95
        $header->expects($this->once())
96
            ->method('get')
97
            ->willReturn($headerResources);
98
99
        $page = new Page(new Document(), $header);
100
        $fonts = $page->getFonts();
101
102
        $this->assertEmpty($fonts);
103
        $this->assertEquals([], $fonts);
104
    }
105
106
    public function testGetFont()
107
    {
108
        // Document with text.
109
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
110
        $parser = $this->getParserInstance();
111
        $document = $parser->parseFile($filename);
112
        $pages = $document->getPages();
113
        $page = $pages[0];
114
115
        // the first to load data.
116
        $font = $page->getFont('R7');
117
        $this->assertTrue($font instanceof Font);
118
119
        $font = $page->getFont('ABC7');
120
        $this->assertTrue($font instanceof Font);
121
    }
122
123
    public function testGetText()
124
    {
125
        // Document with text.
126
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
127
        $parser = $this->getParserInstance();
128
        $document = $parser->parseFile($filename);
129
        $pages = $document->getPages();
130
        $page = $pages[0];
131
        $text = $page->getText();
132
133
        $this->assertTrue(150 < \strlen($text));
134
        $this->assertContains('Document title', $text);
135
        $this->assertContains('Lorem ipsum', $text);
136
137
        $this->assertContains('Calibri', $text);
138
        $this->assertContains('Arial', $text);
139
        $this->assertContains('Times', $text);
140
        $this->assertContains('Courier New', $text);
141
        $this->assertContains('Verdana', $text);
142
    }
143
144
    public function testExtractRawData()
145
    {
146
        // Document with text.
147
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
148
        $parser = $this->getParserInstance();
149
        $document = $parser->parseFile($filename);
150
        $pages = $document->getPages();
151
        $page = $pages[0];
152
        $extractedRawData = $page->extractRawData();
153
        $tmItem = $extractedRawData[1];
154
155
        $this->assertcount(172, $extractedRawData);
156
        $this->assertCount(3, $tmItem);
157
158
        $this->assertArrayHasKey('t', $tmItem);
159
        $this->assertArrayHasKey('o', $tmItem);
160
        $this->assertArrayHasKey('c', $tmItem);
161
162
        $this->assertContains('Tm', $tmItem['o']);
163
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
164
    }
165
166
    public function testExtractDecodedRawData()
167
    {
168
        // Document with text.
169
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
170
        $parser = $this->getParserInstance();
171
        $document = $parser->parseFile($filename);
172
        $pages = $document->getPages();
173
        $page = $pages[0];
174
        $extractedDecodedRawData = $page->extractDecodedRawData();
175
        $tmItem = $extractedDecodedRawData[1];
176
        $this->assertCount(172, $extractedDecodedRawData);
177
        $this->assertCount(3, $tmItem);
178
179
        $this->assertArrayHasKey('t', $tmItem);
180
        $this->assertArrayHasKey('o', $tmItem);
181
        $this->assertArrayHasKey('c', $tmItem);
182
183
        $this->assertContains('Tm', $tmItem['o']);
184
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
185
186
        $this->assertCount(3, $tmItem);
187
        $this->assertArrayHasKey('t', $tmItem);
188
        $this->assertArrayHasKey('o', $tmItem);
189
        $this->assertArrayHasKey('c', $tmItem);
190
191
        $tjItem = $extractedDecodedRawData[2];
192
        $this->assertContains('TJ', $tjItem['o']);
193
        $this->assertContains('(', $tjItem['c'][0]['t']);
194
        $this->assertContains('D', $tjItem['c'][0]['c']);
195
        $this->assertContains('n', $tjItem['c'][1]['t']);
196
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
197
        $this->assertContains('(', $tjItem['c'][2]['t']);
198
        $this->assertContains('o', $tjItem['c'][2]['c']);
199
    }
200
201
    public function testExtractRawDataWithCorruptedPdf()
202
    {
203
        $this->expectException(\Exception::class);
204
        $this->expectExceptionMessage('Unable to find xref (PDF corrupted?)');
205
206
        $this
207
            ->getParserInstance()
208
            ->parseFile($this->rootDir.'/samples/corrupted.pdf')
209
            ->getPages();
210
    }
211
212
    public function testGetDataCommands()
213
    {
214
        // Document with text.
215
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
216
        $parser = $this->getParserInstance();
217
        $document = $parser->parseFile($filename);
218
        $pages = $document->getPages();
219
        $page = $pages[0];
220
        $dataCommands = $page->getDataCommands();
221
        $this->assertCount(166, $dataCommands);
222
223
        $tmItem = $dataCommands[0];
224
        $this->assertCount(3, $tmItem);
225
        $this->assertArrayHasKey('t', $tmItem);
226
        $this->assertArrayHasKey('o', $tmItem);
227
        $this->assertArrayHasKey('c', $tmItem);
228
229
        $this->assertContains('Tm', $tmItem['o']);
230
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
231
        $tjItem = $dataCommands[1];
232
233
        $this->assertCount(3, $tjItem);
234
        $this->assertArrayHasKey('t', $tjItem);
235
        $this->assertArrayHasKey('o', $tjItem);
236
        $this->assertArrayHasKey('c', $tjItem);
237
238
        $this->assertContains('TJ', $tjItem['o']);
239
        $this->assertContains('(', $tjItem['c'][0]['t']);
240
        $this->assertContains('D', $tjItem['c'][0]['c']);
241
        $this->assertContains('n', $tjItem['c'][1]['t']);
242
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
243
        $this->assertContains('(', $tjItem['c'][2]['t']);
244
        $this->assertContains('o', $tjItem['c'][2]['c']);
245
    }
246
247
    public function testGetDataTm()
248
    {
249
        // Document with text.
250
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
251
        $parser = $this->getParserInstance();
252
        $document = $parser->parseFile($filename);
253
        $pages = $document->getPages();
254
        $page = $pages[0];
255
256
        $dataTm = $page->getDataTm();
257
        $this->assertCount(81, $dataTm);
258
259
        $item = $dataTm[0];
260
        $this->assertCount(2, $item);
261
        $this->assertCount(6, $item[0]);
262
        $this->assertEquals(
263
            [
264
                '0.999429',
265
                '0',
266
                '0',
267
                '1',
268
                '201.96',
269
                '720.68',
270
            ],
271
            $item[0]
272
        );
273
274
        $this->assertContains('Document title', $item[1]);
275
        $item = $dataTm[2];
276
        $this->assertEquals(
277
            [
278
                '0.999402',
279
                '0',
280
                '0',
281
                '1',
282
                '70.8',
283
                '673.64',
284
            ],
285
            $item[0]
286
        );
287
288
        $this->assertContains('Calibri : Lorem ipsum dolor sit amet, consectetur a', $item[1]);
289
290
        $item = $dataTm[80];
291
        $this->assertEquals(
292
            [
293
                '0.999402',
294
                '0',
295
                '0',
296
                '1',
297
                '343.003',
298
                '81.44',
299
            ],
300
            $item[0]
301
        );
302
        $this->assertContains('nenatis.', $item[1]);
303
304
        // ------------------------------------------------------
305
        // Document is a form
306
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
307
        $document = $parser->parseFile($filename);
308
        $pages = $document->getPages();
309
        $page = $pages[0];
310
        $dataTm = $page->getDataTm();
311
        $item = $dataTm[2];
312
        $this->assertCount(105, $dataTm);
313
        $this->assertCount(2, $item);
314
        $this->assertCount(6, $item[0]);
315
        $this->assertEquals(
316
            [
317
                '1',
318
                '0',
319
                '0',
320
                '1',
321
                '167.3',
322
                '894.58',
323
            ],
324
            $item[0]
325
        );
326
        $this->assertContains('MyName  MyLastName', $item[1]);
327
328
        $item = $dataTm[6];
329
        $this->assertEquals(
330
            [
331
                '1',
332
                '0',
333
                '0',
334
                '1',
335
                '681.94',
336
                '877.42',
337
            ],
338
            $item[0]
339
        );
340
        $this->assertContains('1/1/2020', $item[1]);
341
342
        $item = $dataTm[8];
343
        $this->assertEquals(
344
            [
345
                '1',
346
                '0',
347
                '0',
348
                '1',
349
                '174.86',
350
                '827.14',
351
            ],
352
            $item[0]
353
        );
354
        $this->assertContains('Purchase 1', $item[1]);
355
356
        // ------------------------------------------------------
357
        // Document is another form of the same type
358
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
359
        $document = $parser->parseFile($filename);
360
        $pages = $document->getPages();
361
        $page = $pages[0];
362
        $dataTm = $page->getDataTm();
363
364
        $item = $dataTm[2];
365
        $this->assertCount(105, $dataTm);
366
        $this->assertCount(2, $item);
367
        $this->assertCount(6, $item[0]);
368
        $this->assertEquals(
369
            [
370
                '1',
371
                '0',
372
                '0',
373
                '1',
374
                '167.3',
375
                '894.58',
376
            ],
377
            $item[0]
378
        );
379
        $this->assertContains("Other'sName  Other'sLastName", $item[1]);
380
381
        $item = $dataTm[6];
382
        $this->assertEquals(
383
            [
384
                '1',
385
                '0',
386
                '0',
387
                '1',
388
                '681.94',
389
                '877.42',
390
            ],
391
            $item[0]
392
        );
393
        $this->assertContains('2/2/2020', $item[1]);
394
395
        $item = $dataTm[8];
396
        $this->assertEquals(
397
            [
398
                '1',
399
                '0',
400
                '0',
401
                '1',
402
                '174.86',
403
                '827.14',
404
            ],
405
            $item[0]
406
        );
407
        $this->assertContains('Purchase 2', $item[1]);
408
    }
409
410
    public function testGetTextXY()
411
    {
412
        // Document with text.
413
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
414
        $parser = $this->getParserInstance();
415
        $document = $parser->parseFile($filename);
416
        $pages = $document->getPages();
417
        $page = $pages[0];
418
        $result = $page->getTextXY(201.96, 720.68);
419
        $this->assertCount(1, $result);
420
        $this->assertCount(2, $result[0]);
421
        $this->assertEquals(
422
            [
423
                '0.999429',
424
                '0',
425
                '0',
426
                '1',
427
                '201.96',
428
                '720.68',
429
            ],
430
            $result[0][0]
431
        );
432
        $this->assertContains('Document title', $result[0][1]);
433
434
        $result = $page->getTextXY(201, 720);
435
        $this->assertCount(0, $result);
436
437
        $result = $page->getTextXY(201, 720, 1, 1);
438
        $this->assertCount(1, $result);
439
        $this->assertCount(2, $result[0]);
440
        $this->assertEquals(
441
            [
442
                '0.999429',
443
                '0',
444
                '0',
445
                '1',
446
                '201.96',
447
                '720.68',
448
            ],
449
            $result[0][0]
450
        );
451
        $this->assertContains('Document title', $result[0][1]);
452
453
        // ------------------------------------------------------
454
        // Document is a form
455
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
456
        $document = $parser->parseFile($filename);
457
        $pages = $document->getPages();
458
        $page = $pages[0];
459
        $result = $page->getTextXY(167, 894, 1, 1);
460
        $this->assertCount(1, $result);
461
        $this->assertCount(2, $result[0]);
462
        $this->assertEquals(
463
            [
464
                '1',
465
                '0',
466
                '0',
467
                '1',
468
                '167.3',
469
                '894.58',
470
            ],
471
            $result[0][0]
472
        );
473
        $this->assertContains('MyName  MyLastName', $result[0][1]);
474
475
        $result = $page->getTextXY(681, 877, 1, 1);
476
        $this->assertContains('1/1/2020', $result[0][1]);
477
478
        $result = $page->getTextXY(174, 827, 1, 1);
479
        $this->assertContains('Purchase 1', $result[0][1]);
480
481
        // ------------------------------------------------------
482
        // Document is another form of the same type
483
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
484
        $document = $parser->parseFile($filename);
485
        $pages = $document->getPages();
486
        $page = $pages[0];
487
        $result = $page->getTextXY(167, 894, 1, 1);
488
        $this->assertEquals(
489
            [
490
                '1',
491
                '0',
492
                '0',
493
                '1',
494
                '167.3',
495
                '894.58',
496
            ],
497
            $result[0][0]
498
        );
499
        $this->assertContains("Other'sName  Other'sLastName", $result[0][1]);
500
501
        $result = $page->getTextXY(681, 877, 1, 1);
502
        $this->assertContains('2/2/2020', $result[0][1]);
503
504
        $result = $page->getTextXY(174, 827, 1, 1);
505
        $this->assertContains('Purchase 2', $result[0][1]);
506
    }
507
}
508