Test Failed
Pull Request — master (#457)
by
unknown
02:16
created

PageTest   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 601
Duplicated Lines 0 %

Importance

Changes 10
Bugs 3 Features 0
Metric Value
eloc 400
c 10
b 3
f 0
dl 0
loc 601
rs 10
wmc 17

15 Methods

Rating   Name   Duplication   Size   Complexity  
A testGetFont() 0 15 1
A testGetText() 0 19 1
A testGetFontsElementMissing() 0 27 1
A testGetFonts() 0 32 2
A testGetDataTmIssue336() 0 24 1
A testGetDataCommands() 0 33 1
A testGetTextPullRequest457() 0 24 1
A testExtractDecodedRawDataIssue450() 0 16 1
A testGetPages() 0 15 2
A testExtractDecodedRawData() 0 33 1
A testGetDataTmIssue450() 0 21 1
A testExtractRawData() 0 29 1
B testGetTextXY() 0 96 1
A testExtractRawDataWithCorruptedPdf() 0 9 1
B testGetDataTm() 0 161 1
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Integration;
34
35
use Smalot\PdfParser\Document;
36
use Smalot\PdfParser\Element\ElementMissing;
37
use Smalot\PdfParser\Font;
38
use Smalot\PdfParser\Page;
39
use Tests\Smalot\PdfParser\TestCase;
40
41
class PageTest extends TestCase
42
{
43
    public function testGetFonts()
44
    {
45
        // Document with text.
46
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
47
        $parser = $this->getParserInstance();
48
        $document = $parser->parseFile($filename);
49
        $pages = $document->getPages();
50
        $page = $pages[0];
51
52
        // the first to load data.
53
        $fonts = $page->getFonts();
54
        $this->assertTrue(0 < \count($fonts));
55
        foreach ($fonts as $font) {
56
            $this->assertTrue($font instanceof Font);
57
        }
58
        // the second to use cache.
59
        $fonts = $page->getFonts();
60
        $this->assertTrue(0 < \count($fonts));
61
62
        // ------------------------------------------------------
63
        // Document without text.
64
        $filename = $this->rootDir.'/samples/Document3_pdfcreator_nocompressed.pdf';
65
        $document = $parser->parseFile($filename);
66
        $pages = $document->getPages();
67
        $page = $pages[0];
68
69
        // the first to load data.
70
        $fonts = $page->getFonts();
71
        $this->assertEquals(0, \count($fonts));
72
        // the second to use cache.
73
        $fonts = $page->getFonts();
74
        $this->assertEquals(0, \count($fonts));
75
    }
76
77
    public function testGetFontsElementMissing()
78
    {
79
        $headerResources = $this->getMockBuilder('Smalot\PdfParser\Header')
80
            ->disableOriginalConstructor()
81
            ->getMock();
82
83
        $headerResources->expects($this->once())
84
            ->method('has')
85
            ->willReturn(true);
86
87
        $headerResources->expects($this->once())
88
            ->method('get')
89
            ->willReturn(new ElementMissing());
90
91
        $header = $this->getMockBuilder('Smalot\PdfParser\Header')
92
            ->disableOriginalConstructor()
93
            ->getMock();
94
95
        $header->expects($this->once())
96
            ->method('get')
97
            ->willReturn($headerResources);
98
99
        $page = new Page(new Document(), $header);
100
        $fonts = $page->getFonts();
101
102
        $this->assertEmpty($fonts);
103
        $this->assertEquals([], $fonts);
104
    }
105
106
    public function testGetFont()
107
    {
108
        // Document with text.
109
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
110
        $parser = $this->getParserInstance();
111
        $document = $parser->parseFile($filename);
112
        $pages = $document->getPages();
113
        $page = $pages[0];
114
115
        // the first to load data.
116
        $font = $page->getFont('R7');
117
        $this->assertTrue($font instanceof Font);
118
119
        $font = $page->getFont('ABC7');
120
        $this->assertTrue($font instanceof Font);
121
    }
122
123
    public function testGetText()
124
    {
125
        // Document with text.
126
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
127
        $parser = $this->getParserInstance();
128
        $document = $parser->parseFile($filename);
129
        $pages = $document->getPages();
130
        $page = $pages[0];
131
        $text = $page->getText();
132
133
        $this->assertTrue(150 < \strlen($text));
134
        $this->assertStringContainsString('Document title', $text);
135
        $this->assertStringContainsString('Lorem ipsum', $text);
136
137
        $this->assertStringContainsString('Calibri', $text);
138
        $this->assertStringContainsString('Arial', $text);
139
        $this->assertStringContainsString('Times', $text);
140
        $this->assertStringContainsString('Courier New', $text);
141
        $this->assertStringContainsString('Verdana', $text);
142
    }
143
144
    /**
145
     * @see https://github.com/smalot/pdfparser/pull/457
146
     */
147
    public function testGetTextPullRequest457()
148
    {
149
        // Document with text.
150
        $filename = $this->rootDir.'/samples/bugs/PullRequest457.pdf';
151
        $parser = $this->getParserInstance();
152
        $document = $parser->parseFile($filename);
153
        $pages = $document->getPages();
154
        $page = $pages[0];
155
        $text = $page->getText();
156
157
        $this->assertTrue(1000 < \strlen($text));
158
        $this->assertStringContainsString('SUPER', $text);
159
        $this->assertStringContainsString('VOORDEEL', $text);
160
        $this->assertStringContainsString('KRANT', $text);
161
        $this->assertStringContainsString('DINSDAG', $text);
162
        $this->assertStringContainsString('Snelfilterkoffie', $text);
163
        $this->assertStringContainsString('AardappelenZak', $text);
164
        $this->assertStringContainsString('ALL', $text);
165
166
        unset($parser);
167
        unset($document);
168
        unset($pages);
169
        unset($page);
170
        unset($text);
171
    }
172
173
    public function testExtractRawData()
174
    {
175
        // Document with text.
176
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
177
        $parser = $this->getParserInstance();
178
        $document = $parser->parseFile($filename);
179
        $pages = $document->getPages();
180
        $page = $pages[0];
181
        $extractedRawData = $page->extractRawData();
182
183
        $btItem = $extractedRawData[0];
184
        $this->assertCount(3, $btItem);
185
        $this->assertArrayHasKey('t', $btItem);
186
        $this->assertArrayHasKey('o', $btItem);
187
        $this->assertArrayHasKey('c', $btItem);
188
189
        $this->assertEquals('BT', $btItem['o']);
190
191
        $tmItem = $extractedRawData[2];
192
193
        $this->assertcount(174, $extractedRawData);
194
        $this->assertCount(3, $tmItem);
195
196
        $this->assertArrayHasKey('t', $tmItem);
197
        $this->assertArrayHasKey('o', $tmItem);
198
        $this->assertArrayHasKey('c', $tmItem);
199
200
        $this->assertStringContainsString('Tm', $tmItem['o']);
201
        $this->assertStringContainsString('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
202
    }
203
204
    public function testExtractDecodedRawData()
205
    {
206
        // Document with text.
207
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
208
        $parser = $this->getParserInstance();
209
        $document = $parser->parseFile($filename);
210
        $pages = $document->getPages();
211
        $page = $pages[0];
212
        $extractedDecodedRawData = $page->extractDecodedRawData();
213
        $tmItem = $extractedDecodedRawData[2];
214
        $this->assertCount(174, $extractedDecodedRawData);
215
        $this->assertCount(3, $tmItem);
216
217
        $this->assertArrayHasKey('t', $tmItem);
218
        $this->assertArrayHasKey('o', $tmItem);
219
        $this->assertArrayHasKey('c', $tmItem);
220
221
        $this->assertStringContainsString('Tm', $tmItem['o']);
222
        $this->assertStringContainsString('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
223
224
        $this->assertCount(3, $tmItem);
225
        $this->assertArrayHasKey('t', $tmItem);
226
        $this->assertArrayHasKey('o', $tmItem);
227
        $this->assertArrayHasKey('c', $tmItem);
228
229
        $tjItem = $extractedDecodedRawData[3];
230
        $this->assertStringContainsString('TJ', $tjItem['o']);
231
        $this->assertStringContainsString('(', $tjItem['c'][0]['t']);
232
        $this->assertStringContainsString('D', $tjItem['c'][0]['c']);
233
        $this->assertStringContainsString('n', $tjItem['c'][1]['t']);
234
        $this->assertStringContainsString('0.325008', $tjItem['c'][1]['c']);
235
        $this->assertStringContainsString('(', $tjItem['c'][2]['t']);
236
        $this->assertStringContainsString('o', $tjItem['c'][2]['c']);
237
    }
238
239
    public function testExtractRawDataWithCorruptedPdf()
240
    {
241
        $this->expectException(\Exception::class);
242
        $this->expectExceptionMessage('Unable to find xref (PDF corrupted?)');
243
244
        $this
245
            ->getParserInstance()
246
            ->parseFile($this->rootDir.'/samples/corrupted.pdf')
247
            ->getPages();
248
    }
249
250
    public function testGetDataCommands()
251
    {
252
        // Document with text.
253
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
254
        $parser = $this->getParserInstance();
255
        $document = $parser->parseFile($filename);
256
        $pages = $document->getPages();
257
        $page = $pages[0];
258
        $dataCommands = $page->getDataCommands();
259
        $this->assertCount(168, $dataCommands);
260
261
        $tmItem = $dataCommands[1];
262
        $this->assertCount(3, $tmItem);
263
        $this->assertArrayHasKey('t', $tmItem);
264
        $this->assertArrayHasKey('o', $tmItem);
265
        $this->assertArrayHasKey('c', $tmItem);
266
267
        $this->assertStringContainsString('Tm', $tmItem['o']);
268
        $this->assertStringContainsString('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
269
270
        $tjItem = $dataCommands[2];
271
        $this->assertCount(3, $tjItem);
272
        $this->assertArrayHasKey('t', $tjItem);
273
        $this->assertArrayHasKey('o', $tjItem);
274
        $this->assertArrayHasKey('c', $tjItem);
275
276
        $this->assertStringContainsString('TJ', $tjItem['o']);
277
        $this->assertStringContainsString('(', $tjItem['c'][0]['t']);
278
        $this->assertStringContainsString('D', $tjItem['c'][0]['c']);
279
        $this->assertStringContainsString('n', $tjItem['c'][1]['t']);
280
        $this->assertStringContainsString('0.325008', $tjItem['c'][1]['c']);
281
        $this->assertStringContainsString('(', $tjItem['c'][2]['t']);
282
        $this->assertStringContainsString('o', $tjItem['c'][2]['c']);
283
    }
284
285
    public function testGetDataTm()
286
    {
287
        // Document with text.
288
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
289
        $parser = $this->getParserInstance();
290
        $document = $parser->parseFile($filename);
291
        $pages = $document->getPages();
292
        $page = $pages[0];
293
294
        $dataTm = $page->getDataTm();
295
        $this->assertCount(81, $dataTm);
296
297
        $item = $dataTm[0];
298
        $this->assertCount(2, $item);
299
        $this->assertCount(6, $item[0]);
300
        $this->assertEquals(
301
            [
302
                '0.999429',
303
                '0',
304
                '0',
305
                '1',
306
                '201.96',
307
                '720.68',
308
            ],
309
            $item[0]
310
        );
311
312
        $this->assertStringContainsString('Document title', $item[1]);
313
        $item = $dataTm[2];
314
        $this->assertEquals(
315
            [
316
                '0.999402',
317
                '0',
318
                '0',
319
                '1',
320
                '70.8',
321
                '673.64',
322
            ],
323
            $item[0]
324
        );
325
326
        $this->assertStringContainsString('Calibri : Lorem ipsum dolor sit amet, consectetur a', $item[1]);
327
328
        $item = $dataTm[80];
329
        $this->assertEquals(
330
            [
331
                '0.999402',
332
                '0',
333
                '0',
334
                '1',
335
                '343.003',
336
                '81.44',
337
            ],
338
            $item[0]
339
        );
340
        $this->assertStringContainsString('nenatis.', $item[1]);
341
342
        // ------------------------------------------------------
343
        // Document is a form
344
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
345
        $document = $parser->parseFile($filename);
346
        $pages = $document->getPages();
347
        $page = $pages[0];
348
        $dataTm = $page->getDataTm();
349
        $item = $dataTm[2];
350
        $this->assertCount(105, $dataTm);
351
        $this->assertCount(2, $item);
352
        $this->assertCount(6, $item[0]);
353
        $this->assertEquals(
354
            [
355
                '1',
356
                '0',
357
                '0',
358
                '1',
359
                '167.3',
360
                '894.58',
361
            ],
362
            $item[0]
363
        );
364
        $this->assertStringContainsString('MyName  MyLastName', $item[1]);
365
366
        $item = $dataTm[6];
367
        $this->assertEquals(
368
            [
369
                '1',
370
                '0',
371
                '0',
372
                '1',
373
                '681.94',
374
                '877.42',
375
            ],
376
            $item[0]
377
        );
378
        $this->assertStringContainsString('1/1/2020', $item[1]);
379
380
        $item = $dataTm[8];
381
        $this->assertEquals(
382
            [
383
                '1',
384
                '0',
385
                '0',
386
                '1',
387
                '174.86',
388
                '827.14',
389
            ],
390
            $item[0]
391
        );
392
        $this->assertStringContainsString('Purchase 1', $item[1]);
393
394
        // ------------------------------------------------------
395
        // Document is another form of the same type
396
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
397
        $document = $parser->parseFile($filename);
398
        $pages = $document->getPages();
399
        $page = $pages[0];
400
        $dataTm = $page->getDataTm();
401
402
        $item = $dataTm[2];
403
        $this->assertCount(105, $dataTm);
404
        $this->assertCount(2, $item);
405
        $this->assertCount(6, $item[0]);
406
        $this->assertEquals(
407
            [
408
                '1',
409
                '0',
410
                '0',
411
                '1',
412
                '167.3',
413
                '894.58',
414
            ],
415
            $item[0]
416
        );
417
        $this->assertStringContainsString("Other'sName  Other'sLastName", $item[1]);
418
419
        $item = $dataTm[6];
420
        $this->assertEquals(
421
            [
422
                '1',
423
                '0',
424
                '0',
425
                '1',
426
                '681.94',
427
                '877.42',
428
            ],
429
            $item[0]
430
        );
431
        $this->assertStringContainsString('2/2/2020', $item[1]);
432
433
        $item = $dataTm[8];
434
        $this->assertEquals(
435
            [
436
                '1',
437
                '0',
438
                '0',
439
                '1',
440
                '174.86',
441
                '827.14',
442
            ],
443
            $item[0]
444
        );
445
        $this->assertStringContainsString('Purchase 2', $item[1]);
446
    }
447
448
    /**
449
     * Tests getDataTm with hexadecimal encoded document text.
450
     *
451
     * @see https://github.com/smalot/pdfparser/issues/336
452
     */
453
    public function testGetDataTmIssue336()
454
    {
455
        $filename = $this->rootDir.'/samples/bugs/Issue336_decode_hexadecimal.pdf';
456
        $document = $this->getParserInstance()->parseFile($filename);
457
        $pages = $document->getPages();
458
        $page = $pages[0];
459
        $dataTm = $page->getDataTm();
460
461
        $item = $dataTm[2];
462
        $this->assertCount(13, $dataTm);
463
        $this->assertCount(2, $item);
464
        $this->assertCount(6, $item[0]);
465
        $this->assertEquals(
466
            [
467
                '1',
468
                '0',
469
                '0',
470
                '1',
471
                '318.185',
472
                '665.044',
473
            ],
474
            $item[0]
475
        );
476
        $this->assertEquals('Lorem', $item[1]);
477
    }
478
479
    /**
480
     * Tests that getPages() only returns Page objects
481
     *
482
     * @see https://github.com/smalot/pdfparser/issues/331
483
     *
484
     * Sample pdf file provided by @Reqrefusion, see
485
     * https://github.com/smalot/pdfparser/pull/350#issuecomment-703195220
486
     */
487
    public function testGetPages()
488
    {
489
        $filename = $this->rootDir.'/samples/bugs/Issue331.pdf';
490
        $document = $this->getParserInstance()->parseFile($filename);
491
        $pages = $document->getPages();
492
493
        // This should actually be 3 pages, but as long as the cause for issue #331
494
        // has not been found and the issue is not fixed, we'll settle for 2 here.
495
        // We still test for the count, so in case the bug should be fixed
496
        // unknowingly, we don't forget to resolve the issue as well and make sure
497
        // this assertion is present.
498
        $this->assertCount(2, $pages);
499
500
        foreach ($pages as $page) {
501
            $this->assertTrue($page instanceof Page);
502
        }
503
    }
504
505
    public function testGetTextXY()
506
    {
507
        // Document with text.
508
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
509
        $parser = $this->getParserInstance();
510
        $document = $parser->parseFile($filename);
511
        $pages = $document->getPages();
512
        $page = $pages[0];
513
        $result = $page->getTextXY(201.96, 720.68);
514
        $this->assertCount(1, $result);
515
        $this->assertCount(2, $result[0]);
516
        $this->assertEquals(
517
            [
518
                '0.999429',
519
                '0',
520
                '0',
521
                '1',
522
                '201.96',
523
                '720.68',
524
            ],
525
            $result[0][0]
526
        );
527
        $this->assertStringContainsString('Document title', $result[0][1]);
528
529
        $result = $page->getTextXY(201, 720);
530
        $this->assertCount(0, $result);
531
532
        $result = $page->getTextXY(201, 720, 1, 1);
533
        $this->assertCount(1, $result);
534
        $this->assertCount(2, $result[0]);
535
        $this->assertEquals(
536
            [
537
                '0.999429',
538
                '0',
539
                '0',
540
                '1',
541
                '201.96',
542
                '720.68',
543
            ],
544
            $result[0][0]
545
        );
546
        $this->assertStringContainsString('Document title', $result[0][1]);
547
548
        // ------------------------------------------------------
549
        // Document is a form
550
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
551
        $document = $parser->parseFile($filename);
552
        $pages = $document->getPages();
553
        $page = $pages[0];
554
        $result = $page->getTextXY(167, 894, 1, 1);
555
        $this->assertCount(1, $result);
556
        $this->assertCount(2, $result[0]);
557
        $this->assertEquals(
558
            [
559
                '1',
560
                '0',
561
                '0',
562
                '1',
563
                '167.3',
564
                '894.58',
565
            ],
566
            $result[0][0]
567
        );
568
        $this->assertStringContainsString('MyName  MyLastName', $result[0][1]);
569
570
        $result = $page->getTextXY(681, 877, 1, 1);
571
        $this->assertStringContainsString('1/1/2020', $result[0][1]);
572
573
        $result = $page->getTextXY(174, 827, 1, 1);
574
        $this->assertStringContainsString('Purchase 1', $result[0][1]);
575
576
        // ------------------------------------------------------
577
        // Document is another form of the same type
578
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
579
        $document = $parser->parseFile($filename);
580
        $pages = $document->getPages();
581
        $page = $pages[0];
582
        $result = $page->getTextXY(167, 894, 1, 1);
583
        $this->assertEquals(
584
            [
585
                '1',
586
                '0',
587
                '0',
588
                '1',
589
                '167.3',
590
                '894.58',
591
            ],
592
            $result[0][0]
593
        );
594
        $this->assertStringContainsString("Other'sName  Other'sLastName", $result[0][1]);
595
596
        $result = $page->getTextXY(681, 877, 1, 1);
597
        $this->assertStringContainsString('2/2/2020', $result[0][1]);
598
599
        $result = $page->getTextXY(174, 827, 1, 1);
600
        $this->assertStringContainsString('Purchase 2', $result[0][1]);
601
    }
602
603
    public function testExtractDecodedRawDataIssue450()
604
    {
605
        $filename = $this->rootDir.'/samples/bugs/Issue450.pdf';
606
        $parser = $this->getParserInstance();
607
        $document = $parser->parseFile($filename);
608
        $pages = $document->getPages();
609
        $page = $pages[0];
610
        $extractedDecodedRawData = $page->extractDecodedRawData();
611
        $this->assertIsArray($extractedDecodedRawData);
612
        $this->assertGreaterThan(3, \count($extractedDecodedRawData));
613
        $this->assertIsArray($extractedDecodedRawData[3]);
614
        $this->assertEquals('TJ', $extractedDecodedRawData[3]['o']);
615
        $this->assertIsArray($extractedDecodedRawData[3]['c']);
616
        $this->assertIsArray($extractedDecodedRawData[3]['c'][0]);
617
        $this->assertEquals(3, \count($extractedDecodedRawData[3]['c'][0]));
618
        $this->assertEquals('{signature:signer505906:Please+Sign+Here}', $extractedDecodedRawData[3]['c'][0]['c']);
619
    }
620
621
    public function testGetDataTmIssue450()
622
    {
623
        $filename = $this->rootDir.'/samples/bugs/Issue450.pdf';
624
        $parser = $this->getParserInstance();
625
        $document = $parser->parseFile($filename);
626
        $pages = $document->getPages();
627
        $page = $pages[0];
628
        $dataTm = $page->getDataTm();
629
        $this->assertIsArray($dataTm);
630
        $this->assertEquals(1, \count($dataTm));
631
        $this->assertIsArray($dataTm[0]);
632
        $this->assertEquals(2, \count($dataTm[0]));
633
        $this->assertIsArray($dataTm[0][0]);
634
        $this->assertEquals(6, \count($dataTm[0][0]));
635
        $this->assertEquals(1, $dataTm[0][0][0]);
636
        $this->assertEquals(0, $dataTm[0][0][1]);
637
        $this->assertEquals(0, $dataTm[0][0][2]);
638
        $this->assertEquals(1, $dataTm[0][0][3]);
639
        $this->assertEquals(67.5, $dataTm[0][0][4]);
640
        $this->assertEquals(756.25, $dataTm[0][0][5]);
641
        $this->assertEquals('{signature:signer505906:Please+Sign+Here}', $dataTm[0][1]);
642
    }
643
}
644