Passed
Push — feature/switch-to-phpunit ( 82614e )
by Konrad
03:39
created

PageTest   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 423
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 9
eloc 291
c 1
b 0
f 0
dl 0
loc 423
rs 10

8 Methods

Rating   Name   Duplication   Size   Complexity  
A testGetDataCommands() 0 33 1
A testGetFont() 0 15 1
A testGetText() 0 19 1
A testExtractDecodedRawData() 0 33 1
A testExtractRawData() 0 20 1
B testGetTextXY() 0 94 1
A testGetFonts() 0 32 2
B testGetDataTm() 0 161 1
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @license LGPLv3
10
 * @url     <https://github.com/smalot/pdfparser>
11
 *
12
 *  PdfParser is a pdf library written in PHP, extraction oriented.
13
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
14
 *
15
 *  This program is free software: you can redistribute it and/or modify
16
 *  it under the terms of the GNU Lesser General Public License as published by
17
 *  the Free Software Foundation, either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  This program is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU Lesser General Public License for more details.
24
 *
25
 *  You should have received a copy of the GNU Lesser General Public License
26
 *  along with this program.
27
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
28
 */
29
30
namespace Tests\Smalot\PdfParser\Integration;
31
32
use Smalot\PdfParser\Font;
33
use Smalot\PdfParser\Test\TestCase;
34
35
class PageTest extends TestCase
36
{
37
    public function testGetFonts()
38
    {
39
        // Document with text.
40
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
41
        $parser = $this->getParserInstance();
42
        $document = $parser->parseFile($filename);
43
        $pages = $document->getPages();
44
        $page = $pages[0];
45
46
        // the first to load data.
47
        $fonts = $page->getFonts();
48
        $this->assertTrue(0 < \count($fonts));
49
        foreach ($fonts as $font) {
50
            $this->assertTrue($font instanceof Font);
51
        }
52
        // the second to use cache.
53
        $fonts = $page->getFonts();
54
        $this->assertTrue(0 < \count($fonts));
55
56
        // ------------------------------------------------------
57
        // Document without text.
58
        $filename = $this->rootDir.'/samples/Document3_pdfcreator_nocompressed.pdf';
59
        $document = $parser->parseFile($filename);
60
        $pages = $document->getPages();
61
        $page = $pages[0];
62
63
        // the first to load data.
64
        $fonts = $page->getFonts();
65
        $this->assertEquals(0, \count($fonts));
66
        // the second to use cache.
67
        $fonts = $page->getFonts();
68
        $this->assertEquals(0, \count($fonts));
69
    }
70
71
    public function testGetFont()
72
    {
73
        // Document with text.
74
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
75
        $parser = $this->getParserInstance();
76
        $document = $parser->parseFile($filename);
77
        $pages = $document->getPages();
78
        $page = $pages[0];
79
80
        // the first to load data.
81
        $font = $page->getFont('R7');
82
        $this->assertTrue($font instanceof Font);
83
84
        $font = $page->getFont('ABC7');
85
        $this->assertTrue($font instanceof Font);
86
    }
87
88
    public function testGetText()
89
    {
90
        // Document with text.
91
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
92
        $parser = $this->getParserInstance();
93
        $document = $parser->parseFile($filename);
94
        $pages = $document->getPages();
95
        $page = $pages[0];
96
        $text = $page->getText();
97
98
        $this->assertTrue(150 < \strlen($text));
99
        $this->assertContains('Document title', $text);
100
        $this->assertContains('Lorem ipsum', $text);
101
102
        $this->assertContains('Calibri', $text);
103
        $this->assertContains('Arial', $text);
104
        $this->assertContains('Times', $text);
105
        $this->assertContains('Courier New', $text);
106
        $this->assertContains('Verdana', $text);
107
    }
108
109
    public function testExtractRawData()
110
    {
111
        // Document with text.
112
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
113
        $parser = $this->getParserInstance();
114
        $document = $parser->parseFile($filename);
115
        $pages = $document->getPages();
116
        $page = $pages[0];
117
        $extractedRawData = $page->extractRawData();
118
        $tmItem = $extractedRawData[1];
119
120
        $this->assertcount(172, $extractedRawData);
121
        $this->assertCount(3, $tmItem);
122
123
        $this->assertArrayHasKey('t', $tmItem);
0 ignored issues
show
Bug introduced by
It seems like $tmItem can also be of type Countable and Traversable; however, parameter $array of PHPUnit\Framework\Assert::assertArrayHasKey() does only seem to accept ArrayAccess|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

123
        $this->assertArrayHasKey('t', /** @scrutinizer ignore-type */ $tmItem);
Loading history...
124
        $this->assertArrayHasKey('o', $tmItem);
125
        $this->assertArrayHasKey('c', $tmItem);
126
127
        $this->assertContains('Tm', $tmItem['o']);
128
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
129
    }
130
131
    public function testExtractDecodedRawData()
132
    {
133
        // Document with text.
134
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
135
        $parser = $this->getParserInstance();
136
        $document = $parser->parseFile($filename);
137
        $pages = $document->getPages();
138
        $page = $pages[0];
139
        $extractedDecodedRawData = $page->extractDecodedRawData();
140
        $tmItem = $extractedDecodedRawData[1];
141
        $this->assertCount(172, $extractedDecodedRawData);
142
        $this->assertCount(3, $tmItem);
143
144
        $this->assertArrayHasKey('t', $tmItem);
0 ignored issues
show
Bug introduced by
It seems like $tmItem can also be of type Countable and Traversable; however, parameter $array of PHPUnit\Framework\Assert::assertArrayHasKey() does only seem to accept ArrayAccess|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

144
        $this->assertArrayHasKey('t', /** @scrutinizer ignore-type */ $tmItem);
Loading history...
145
        $this->assertArrayHasKey('o', $tmItem);
146
        $this->assertArrayHasKey('c', $tmItem);
147
148
        $this->assertContains('Tm', $tmItem['o']);
149
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
150
151
        $this->assertCount(3, $tmItem);
152
        $this->assertArrayHasKey('t', $tmItem);
153
        $this->assertArrayHasKey('o', $tmItem);
154
        $this->assertArrayHasKey('c', $tmItem);
155
156
        $tjItem = $extractedDecodedRawData[2];
157
        $this->assertContains('TJ', $tjItem['o']);
158
        $this->assertContains('(', $tjItem['c'][0]['t']);
159
        $this->assertContains('D', $tjItem['c'][0]['c']);
160
        $this->assertContains('n', $tjItem['c'][1]['t']);
161
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
162
        $this->assertContains('(', $tjItem['c'][2]['t']);
163
        $this->assertContains('o', $tjItem['c'][2]['c']);
164
    }
165
166
    public function testGetDataCommands()
167
    {
168
        // Document with text.
169
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
170
        $parser = $this->getParserInstance();
171
        $document = $parser->parseFile($filename);
172
        $pages = $document->getPages();
173
        $page = $pages[0];
174
        $dataCommands = $page->getDataCommands();
175
        $this->assertCount(166, $dataCommands);
176
177
        $tmItem = $dataCommands[0];
178
        $this->assertCount(3, $tmItem);
179
        $this->assertArrayHasKey('t', $tmItem);
0 ignored issues
show
Bug introduced by
It seems like $tmItem can also be of type Countable and Traversable; however, parameter $array of PHPUnit\Framework\Assert::assertArrayHasKey() does only seem to accept ArrayAccess|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

179
        $this->assertArrayHasKey('t', /** @scrutinizer ignore-type */ $tmItem);
Loading history...
180
        $this->assertArrayHasKey('o', $tmItem);
181
        $this->assertArrayHasKey('c', $tmItem);
182
183
        $this->assertContains('Tm', $tmItem['o']);
184
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
185
        $tjItem = $dataCommands[1];
186
187
        $this->assertCount(3, $tjItem);
188
        $this->assertArrayHasKey('t', $tjItem);
189
        $this->assertArrayHasKey('o', $tjItem);
190
        $this->assertArrayHasKey('c', $tjItem);
191
192
        $this->assertContains('TJ', $tjItem['o']);
193
        $this->assertContains('(', $tjItem['c'][0]['t']);
194
        $this->assertContains('D', $tjItem['c'][0]['c']);
195
        $this->assertContains('n', $tjItem['c'][1]['t']);
196
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
197
        $this->assertContains('(', $tjItem['c'][2]['t']);
198
        $this->assertContains('o', $tjItem['c'][2]['c']);
199
    }
200
201
    public function testGetDataTm()
202
    {
203
        // Document with text.
204
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
205
        $parser = $this->getParserInstance();
206
        $document = $parser->parseFile($filename);
207
        $pages = $document->getPages();
208
        $page = $pages[0];
209
210
        $dataTm = $page->getDataTm();
211
        $this->assertCount(81, $dataTm);
212
213
        $item = $dataTm[0];
214
        $this->assertCount(2, $item);
215
        $this->assertCount(6, $item[0]);
216
        $this->assertEquals(
217
            [
218
                '0.999429',
219
                '0',
220
                '0',
221
                '1',
222
                '201.96',
223
                '720.68',
224
            ],
225
            $item[0]
226
        );
227
228
        $this->assertContains('Document title', $item[1]);
229
        $item = $dataTm[2];
230
        $this->assertEquals(
231
            [
232
                '0.999402',
233
                '0',
234
                '0',
235
                '1',
236
                '70.8',
237
                '673.64',
238
            ],
239
            $item[0]
240
        );
241
242
        $this->assertContains('Calibri : Lorem ipsum dolor sit amet, consectetur a', $item[1]);
243
244
        $item = $dataTm[80];
245
        $this->assertEquals(
246
            [
247
                '0.999402',
248
                '0',
249
                '0',
250
                '1',
251
                '343.003',
252
                '81.44',
253
            ],
254
            $item[0]
255
        );
256
        $this->assertContains('nenatis.', $item[1]);
257
258
        // ------------------------------------------------------
259
        // Document is a form
260
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
261
        $document = $parser->parseFile($filename);
262
        $pages = $document->getPages();
263
        $page = $pages[0];
264
        $dataTm = $page->getDataTm();
265
        $item = $dataTm[2];
266
        $this->assertCount(105, $dataTm);
267
        $this->assertCount(2, $item);
268
        $this->assertCount(6, $item[0]);
269
        $this->assertEquals(
270
            [
271
                '1',
272
                '0',
273
                '0',
274
                '1',
275
                '167.3',
276
                '894.58',
277
            ],
278
            $item[0]
279
        );
280
        $this->assertContains('MyName  MyLastName', $item[1]);
281
282
        $item = $dataTm[6];
283
        $this->assertEquals(
284
            [
285
                '1',
286
                '0',
287
                '0',
288
                '1',
289
                '681.94',
290
                '877.42',
291
            ],
292
            $item[0]
293
        );
294
        $this->assertContains('1/1/2020', $item[1]);
295
296
        $item = $dataTm[8];
297
        $this->assertEquals(
298
            [
299
                '1',
300
                '0',
301
                '0',
302
                '1',
303
                '174.86',
304
                '827.14',
305
            ],
306
            $item[0]
307
        );
308
        $this->assertContains('Purchase 1', $item[1]);
309
310
        // ------------------------------------------------------
311
        // Document is another form of the same type
312
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
313
        $document = $parser->parseFile($filename);
314
        $pages = $document->getPages();
315
        $page = $pages[0];
316
        $dataTm = $page->getDataTm();
317
318
        $item = $dataTm[2];
319
        $this->assertCount(105, $dataTm);
320
        $this->assertCount(2, $item);
321
        $this->assertCount(6, $item[0]);
322
        $this->assertEquals(
323
            [
324
                '1',
325
                '0',
326
                '0',
327
                '1',
328
                '167.3',
329
                '894.58',
330
            ],
331
            $item[0]
332
        );
333
        $this->assertContains("Other'sName  Other'sLastName", $item[1]);
334
335
        $item = $dataTm[6];
336
        $this->assertEquals(
337
            [
338
                '1',
339
                '0',
340
                '0',
341
                '1',
342
                '681.94',
343
                '877.42',
344
            ],
345
            $item[0]
346
        );
347
        $this->assertContains('2/2/2020', $item[1]);
348
349
        $item = $dataTm[8];
350
        $this->assertEquals(
351
            [
352
                '1',
353
                '0',
354
                '0',
355
                '1',
356
                '174.86',
357
                '827.14',
358
            ],
359
            $item[0]
360
        );
361
        $this->assertContains('Purchase 2', $item[1]);
362
    }
363
364
    public function testGetTextXY()
365
    {
366
        // Document with text.
367
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
368
        $parser = $this->getParserInstance();
369
        $document = $parser->parseFile($filename);
370
        $pages = $document->getPages();
371
        $page = $pages[0];
372
        $result = $page->getTextXY(201.96, 720.68);
373
        $this->assertCount(1, $result);
374
        $this->assertCount(2, $result[0]);
375
        $this->assertEquals(
376
            [
377
                '0.999429',
378
                '0',
379
                '0',
380
                '1',
381
                '201.96',
382
                '720.68',
383
            ],
384
            $result[0][0]
385
        );
386
        $this->assertContains('Document title', $result[0][1]);
387
388
        $result = $page->getTextXY(201, 720);
389
        $this->assertCount(0, $result);
390
391
        $result = $page->getTextXY(201, 720, 1, 1);
392
        $this->assertCount(1, $result);
393
        $this->assertCount(2, $result[0]);
394
        $this->assertEquals(
395
            [
396
                '0.999429',
397
                '0',
398
                '0',
399
                '1',
400
                '201.96',
401
                '720.68',
402
            ],
403
            $result[0][0]
404
        );
405
        $this->assertContains('Document title', $result[0][1]);
406
407
        // ------------------------------------------------------
408
        // Document is a form
409
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
410
        $document = $parser->parseFile($filename);
411
        $pages = $document->getPages();
412
        $page = $pages[0];
413
        $result = $page->getTextXY(167, 894, 1, 1);
414
        $this->assertEquals(
415
            [
416
                '1',
417
                '0',
418
                '0',
419
                '1',
420
                '167.3',
421
                '894.58',
422
            ],
423
            $result[0][0]
424
        );
425
        $this->assertContains('MyName  MyLastName', $result[0][1]);
426
427
        $result = $page->getTextXY(681, 877, 1, 1);
428
        $this->assertContains('1/1/2020', $result[0][1]);
429
430
        $result = $page->getTextXY(174, 827, 1, 1);
431
        $this->assertContains('Purchase 1', $result[0][1]);
432
433
        // ------------------------------------------------------
434
        // Document is another form of the same type
435
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
436
        $document = $parser->parseFile($filename);
437
        $pages = $document->getPages();
438
        $page = $pages[0];
439
        $result = $page->getTextXY(167, 894, 1, 1);
440
        $this->assertEquals(
441
            [
442
                '1',
443
                '0',
444
                '0',
445
                '1',
446
                '167.3',
447
                '894.58',
448
            ],
449
            $result[0][0]
450
        );
451
        $this->assertContains("Other'sName  Other'sLastName", $result[0][1]);
452
453
        $result = $page->getTextXY(681, 877, 1, 1);
454
        $this->assertContains('2/2/2020', $result[0][1]);
455
456
        $result = $page->getTextXY(174, 827, 1, 1);
457
        $this->assertContains('Purchase 2', $result[0][1]);
458
    }
459
}
460