Completed
Pull Request — master (#324)
by Jeremy
11:37 queued 08:14
created

PageTest::testGetFontsElementMissing()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 27
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 19
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 27
rs 9.6333
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Integration;
34
35
use Smalot\PdfParser\Document;
36
use Smalot\PdfParser\Element\ElementMissing;
37
use Smalot\PdfParser\Font;
38
use Smalot\PdfParser\Page;
39
use Test\Smalot\PdfParser\TestCase;
40
41
class PageTest extends TestCase
42
{
43
    public function testGetFonts()
44
    {
45
        // Document with text.
46
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
47
        $parser = $this->getParserInstance();
48
        $document = $parser->parseFile($filename);
49
        $pages = $document->getPages();
50
        $page = $pages[0];
51
52
        // the first to load data.
53
        $fonts = $page->getFonts();
54
        $this->assertTrue(0 < \count($fonts));
55
        foreach ($fonts as $font) {
56
            $this->assertTrue($font instanceof Font);
57
        }
58
        // the second to use cache.
59
        $fonts = $page->getFonts();
60
        $this->assertTrue(0 < \count($fonts));
61
62
        // ------------------------------------------------------
63
        // Document without text.
64
        $filename = $this->rootDir.'/samples/Document3_pdfcreator_nocompressed.pdf';
65
        $document = $parser->parseFile($filename);
66
        $pages = $document->getPages();
67
        $page = $pages[0];
68
69
        // the first to load data.
70
        $fonts = $page->getFonts();
71
        $this->assertEquals(0, \count($fonts));
72
        // the second to use cache.
73
        $fonts = $page->getFonts();
74
        $this->assertEquals(0, \count($fonts));
75
    }
76
77
    public function testGetFontsElementMissing()
78
    {
79
        $headerResources = $this->getMockBuilder('Smalot\PdfParser\Header')
80
            ->disableOriginalConstructor()
81
            ->getMock();
82
83
        $headerResources->expects($this->once())
84
            ->method('has')
85
            ->willReturn(true);
86
87
        $headerResources->expects($this->once())
88
            ->method('get')
89
            ->willReturn(new ElementMissing());
90
91
        $header = $this->getMockBuilder('Smalot\PdfParser\Header')
92
            ->disableOriginalConstructor()
93
            ->getMock();
94
95
        $header->expects($this->once())
96
            ->method('get')
97
            ->willReturn($headerResources);
98
99
        $page = new Page(new Document(), $header);
100
        $fonts = $page->getFonts();
101
102
        $this->assertEmpty($fonts);
103
        $this->assertEquals([], $fonts);
104
    }
105
106
    public function testGetFont()
107
    {
108
        // Document with text.
109
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
110
        $parser = $this->getParserInstance();
111
        $document = $parser->parseFile($filename);
112
        $pages = $document->getPages();
113
        $page = $pages[0];
114
115
        // the first to load data.
116
        $font = $page->getFont('R7');
117
        $this->assertTrue($font instanceof Font);
118
119
        $font = $page->getFont('ABC7');
120
        $this->assertTrue($font instanceof Font);
121
    }
122
123
    public function testGetText()
124
    {
125
        // Document with text.
126
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
127
        $parser = $this->getParserInstance();
128
        $document = $parser->parseFile($filename);
129
        $pages = $document->getPages();
130
        $page = $pages[0];
131
        $text = $page->getText();
132
133
        $this->assertTrue(150 < \strlen($text));
134
        $this->assertContains('Document title', $text);
135
        $this->assertContains('Lorem ipsum', $text);
136
137
        $this->assertContains('Calibri', $text);
138
        $this->assertContains('Arial', $text);
139
        $this->assertContains('Times', $text);
140
        $this->assertContains('Courier New', $text);
141
        $this->assertContains('Verdana', $text);
142
    }
143
144
    public function testExtractRawData()
145
    {
146
        // Document with text.
147
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
148
        $parser = $this->getParserInstance();
149
        $document = $parser->parseFile($filename);
150
        $pages = $document->getPages();
151
        $page = $pages[0];
152
        $extractedRawData = $page->extractRawData();
153
        $tmItem = $extractedRawData[1];
154
155
        $this->assertcount(172, $extractedRawData);
156
        $this->assertCount(3, $tmItem);
157
158
        $this->assertArrayHasKey('t', $tmItem);
159
        $this->assertArrayHasKey('o', $tmItem);
160
        $this->assertArrayHasKey('c', $tmItem);
161
162
        $this->assertContains('Tm', $tmItem['o']);
163
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
164
    }
165
166
    public function testExtractDecodedRawData()
167
    {
168
        // Document with text.
169
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
170
        $parser = $this->getParserInstance();
171
        $document = $parser->parseFile($filename);
172
        $pages = $document->getPages();
173
        $page = $pages[0];
174
        $extractedDecodedRawData = $page->extractDecodedRawData();
175
        $tmItem = $extractedDecodedRawData[1];
176
        $this->assertCount(172, $extractedDecodedRawData);
177
        $this->assertCount(3, $tmItem);
178
179
        $this->assertArrayHasKey('t', $tmItem);
180
        $this->assertArrayHasKey('o', $tmItem);
181
        $this->assertArrayHasKey('c', $tmItem);
182
183
        $this->assertContains('Tm', $tmItem['o']);
184
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
185
186
        $this->assertCount(3, $tmItem);
187
        $this->assertArrayHasKey('t', $tmItem);
188
        $this->assertArrayHasKey('o', $tmItem);
189
        $this->assertArrayHasKey('c', $tmItem);
190
191
        $tjItem = $extractedDecodedRawData[2];
192
        $this->assertContains('TJ', $tjItem['o']);
193
        $this->assertContains('(', $tjItem['c'][0]['t']);
194
        $this->assertContains('D', $tjItem['c'][0]['c']);
195
        $this->assertContains('n', $tjItem['c'][1]['t']);
196
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
197
        $this->assertContains('(', $tjItem['c'][2]['t']);
198
        $this->assertContains('o', $tjItem['c'][2]['c']);
199
    }
200
201
    public function testGetDataCommands()
202
    {
203
        // Document with text.
204
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
205
        $parser = $this->getParserInstance();
206
        $document = $parser->parseFile($filename);
207
        $pages = $document->getPages();
208
        $page = $pages[0];
209
        $dataCommands = $page->getDataCommands();
210
        $this->assertCount(166, $dataCommands);
211
212
        $tmItem = $dataCommands[0];
213
        $this->assertCount(3, $tmItem);
214
        $this->assertArrayHasKey('t', $tmItem);
215
        $this->assertArrayHasKey('o', $tmItem);
216
        $this->assertArrayHasKey('c', $tmItem);
217
218
        $this->assertContains('Tm', $tmItem['o']);
219
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
220
        $tjItem = $dataCommands[1];
221
222
        $this->assertCount(3, $tjItem);
223
        $this->assertArrayHasKey('t', $tjItem);
224
        $this->assertArrayHasKey('o', $tjItem);
225
        $this->assertArrayHasKey('c', $tjItem);
226
227
        $this->assertContains('TJ', $tjItem['o']);
228
        $this->assertContains('(', $tjItem['c'][0]['t']);
229
        $this->assertContains('D', $tjItem['c'][0]['c']);
230
        $this->assertContains('n', $tjItem['c'][1]['t']);
231
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
232
        $this->assertContains('(', $tjItem['c'][2]['t']);
233
        $this->assertContains('o', $tjItem['c'][2]['c']);
234
    }
235
236
    public function testGetDataTm()
237
    {
238
        // Document with text.
239
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
240
        $parser = $this->getParserInstance();
241
        $document = $parser->parseFile($filename);
242
        $pages = $document->getPages();
243
        $page = $pages[0];
244
245
        $dataTm = $page->getDataTm();
246
        $this->assertCount(81, $dataTm);
247
248
        $item = $dataTm[0];
249
        $this->assertCount(2, $item);
250
        $this->assertCount(6, $item[0]);
251
        $this->assertEquals(
252
            [
253
                '0.999429',
254
                '0',
255
                '0',
256
                '1',
257
                '201.96',
258
                '720.68',
259
            ],
260
            $item[0]
261
        );
262
263
        $this->assertContains('Document title', $item[1]);
264
        $item = $dataTm[2];
265
        $this->assertEquals(
266
            [
267
                '0.999402',
268
                '0',
269
                '0',
270
                '1',
271
                '70.8',
272
                '673.64',
273
            ],
274
            $item[0]
275
        );
276
277
        $this->assertContains('Calibri : Lorem ipsum dolor sit amet, consectetur a', $item[1]);
278
279
        $item = $dataTm[80];
280
        $this->assertEquals(
281
            [
282
                '0.999402',
283
                '0',
284
                '0',
285
                '1',
286
                '343.003',
287
                '81.44',
288
            ],
289
            $item[0]
290
        );
291
        $this->assertContains('nenatis.', $item[1]);
292
293
        // ------------------------------------------------------
294
        // Document is a form
295
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
296
        $document = $parser->parseFile($filename);
297
        $pages = $document->getPages();
298
        $page = $pages[0];
299
        $dataTm = $page->getDataTm();
300
        $item = $dataTm[2];
301
        $this->assertCount(105, $dataTm);
302
        $this->assertCount(2, $item);
303
        $this->assertCount(6, $item[0]);
304
        $this->assertEquals(
305
            [
306
                '1',
307
                '0',
308
                '0',
309
                '1',
310
                '167.3',
311
                '894.58',
312
            ],
313
            $item[0]
314
        );
315
        $this->assertContains('MyName  MyLastName', $item[1]);
316
317
        $item = $dataTm[6];
318
        $this->assertEquals(
319
            [
320
                '1',
321
                '0',
322
                '0',
323
                '1',
324
                '681.94',
325
                '877.42',
326
            ],
327
            $item[0]
328
        );
329
        $this->assertContains('1/1/2020', $item[1]);
330
331
        $item = $dataTm[8];
332
        $this->assertEquals(
333
            [
334
                '1',
335
                '0',
336
                '0',
337
                '1',
338
                '174.86',
339
                '827.14',
340
            ],
341
            $item[0]
342
        );
343
        $this->assertContains('Purchase 1', $item[1]);
344
345
        // ------------------------------------------------------
346
        // Document is another form of the same type
347
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
348
        $document = $parser->parseFile($filename);
349
        $pages = $document->getPages();
350
        $page = $pages[0];
351
        $dataTm = $page->getDataTm();
352
353
        $item = $dataTm[2];
354
        $this->assertCount(105, $dataTm);
355
        $this->assertCount(2, $item);
356
        $this->assertCount(6, $item[0]);
357
        $this->assertEquals(
358
            [
359
                '1',
360
                '0',
361
                '0',
362
                '1',
363
                '167.3',
364
                '894.58',
365
            ],
366
            $item[0]
367
        );
368
        $this->assertContains("Other'sName  Other'sLastName", $item[1]);
369
370
        $item = $dataTm[6];
371
        $this->assertEquals(
372
            [
373
                '1',
374
                '0',
375
                '0',
376
                '1',
377
                '681.94',
378
                '877.42',
379
            ],
380
            $item[0]
381
        );
382
        $this->assertContains('2/2/2020', $item[1]);
383
384
        $item = $dataTm[8];
385
        $this->assertEquals(
386
            [
387
                '1',
388
                '0',
389
                '0',
390
                '1',
391
                '174.86',
392
                '827.14',
393
            ],
394
            $item[0]
395
        );
396
        $this->assertContains('Purchase 2', $item[1]);
397
    }
398
399
    public function testGetTextXY()
400
    {
401
        // Document with text.
402
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
403
        $parser = $this->getParserInstance();
404
        $document = $parser->parseFile($filename);
405
        $pages = $document->getPages();
406
        $page = $pages[0];
407
        $result = $page->getTextXY(201.96, 720.68);
408
        $this->assertCount(1, $result);
409
        $this->assertCount(2, $result[0]);
410
        $this->assertEquals(
411
            [
412
                '0.999429',
413
                '0',
414
                '0',
415
                '1',
416
                '201.96',
417
                '720.68',
418
            ],
419
            $result[0][0]
420
        );
421
        $this->assertContains('Document title', $result[0][1]);
422
423
        $result = $page->getTextXY(201, 720);
424
        $this->assertCount(0, $result);
425
426
        $result = $page->getTextXY(201, 720, 1, 1);
427
        $this->assertCount(1, $result);
428
        $this->assertCount(2, $result[0]);
429
        $this->assertEquals(
430
            [
431
                '0.999429',
432
                '0',
433
                '0',
434
                '1',
435
                '201.96',
436
                '720.68',
437
            ],
438
            $result[0][0]
439
        );
440
        $this->assertContains('Document title', $result[0][1]);
441
442
        // ------------------------------------------------------
443
        // Document is a form
444
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
445
        $document = $parser->parseFile($filename);
446
        $pages = $document->getPages();
447
        $page = $pages[0];
448
        $result = $page->getTextXY(167, 894, 1, 1);
449
        $this->assertCount(1, $result);
450
        $this->assertCount(2, $result[0]);
451
        $this->assertEquals(
452
            [
453
                '1',
454
                '0',
455
                '0',
456
                '1',
457
                '167.3',
458
                '894.58',
459
            ],
460
            $result[0][0]
461
        );
462
        $this->assertContains('MyName  MyLastName', $result[0][1]);
463
464
        $result = $page->getTextXY(681, 877, 1, 1);
465
        $this->assertContains('1/1/2020', $result[0][1]);
466
467
        $result = $page->getTextXY(174, 827, 1, 1);
468
        $this->assertContains('Purchase 1', $result[0][1]);
469
470
        // ------------------------------------------------------
471
        // Document is another form of the same type
472
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
473
        $document = $parser->parseFile($filename);
474
        $pages = $document->getPages();
475
        $page = $pages[0];
476
        $result = $page->getTextXY(167, 894, 1, 1);
477
        $this->assertEquals(
478
            [
479
                '1',
480
                '0',
481
                '0',
482
                '1',
483
                '167.3',
484
                '894.58',
485
            ],
486
            $result[0][0]
487
        );
488
        $this->assertContains("Other'sName  Other'sLastName", $result[0][1]);
489
490
        $result = $page->getTextXY(681, 877, 1, 1);
491
        $this->assertContains('2/2/2020', $result[0][1]);
492
493
        $result = $page->getTextXY(174, 827, 1, 1);
494
        $this->assertContains('Purchase 2', $result[0][1]);
495
    }
496
}
497