Completed
Pull Request — master (#300)
by Konrad
13:31 queued 09:23
created

PageTest::testGetTextXY()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 94
Code Lines 67

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 67
c 1
b 0
f 0
dl 0
loc 94
rs 8.72
cc 1
nc 1
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Integration;
34
35
use Smalot\PdfParser\Font;
36
use Test\Smalot\PdfParser\TestCase;
37
38
class PageTest extends TestCase
39
{
40
    public function testGetFonts()
41
    {
42
        // Document with text.
43
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
44
        $parser = $this->getParserInstance();
45
        $document = $parser->parseFile($filename);
46
        $pages = $document->getPages();
47
        $page = $pages[0];
48
49
        // the first to load data.
50
        $fonts = $page->getFonts();
51
        $this->assertTrue(0 < \count($fonts));
52
        foreach ($fonts as $font) {
53
            $this->assertTrue($font instanceof Font);
54
        }
55
        // the second to use cache.
56
        $fonts = $page->getFonts();
57
        $this->assertTrue(0 < \count($fonts));
58
59
        // ------------------------------------------------------
60
        // Document without text.
61
        $filename = $this->rootDir.'/samples/Document3_pdfcreator_nocompressed.pdf';
62
        $document = $parser->parseFile($filename);
63
        $pages = $document->getPages();
64
        $page = $pages[0];
65
66
        // the first to load data.
67
        $fonts = $page->getFonts();
68
        $this->assertEquals(0, \count($fonts));
69
        // the second to use cache.
70
        $fonts = $page->getFonts();
71
        $this->assertEquals(0, \count($fonts));
72
    }
73
74
    public function testGetFont()
75
    {
76
        // Document with text.
77
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
78
        $parser = $this->getParserInstance();
79
        $document = $parser->parseFile($filename);
80
        $pages = $document->getPages();
81
        $page = $pages[0];
82
83
        // the first to load data.
84
        $font = $page->getFont('R7');
85
        $this->assertTrue($font instanceof Font);
86
87
        $font = $page->getFont('ABC7');
88
        $this->assertTrue($font instanceof Font);
89
    }
90
91
    public function testGetText()
92
    {
93
        // Document with text.
94
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
95
        $parser = $this->getParserInstance();
96
        $document = $parser->parseFile($filename);
97
        $pages = $document->getPages();
98
        $page = $pages[0];
99
        $text = $page->getText();
100
101
        $this->assertTrue(150 < \strlen($text));
102
        $this->assertContains('Document title', $text);
103
        $this->assertContains('Lorem ipsum', $text);
104
105
        $this->assertContains('Calibri', $text);
106
        $this->assertContains('Arial', $text);
107
        $this->assertContains('Times', $text);
108
        $this->assertContains('Courier New', $text);
109
        $this->assertContains('Verdana', $text);
110
    }
111
112
    public function testExtractRawData()
113
    {
114
        // Document with text.
115
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
116
        $parser = $this->getParserInstance();
117
        $document = $parser->parseFile($filename);
118
        $pages = $document->getPages();
119
        $page = $pages[0];
120
        $extractedRawData = $page->extractRawData();
121
        $tmItem = $extractedRawData[1];
122
123
        $this->assertcount(172, $extractedRawData);
124
        $this->assertCount(3, $tmItem);
125
126
        $this->assertArrayHasKey('t', $tmItem);
0 ignored issues
show
Bug introduced by
It seems like $tmItem can also be of type Countable and Traversable; however, parameter $array of PHPUnit\Framework\Assert::assertArrayHasKey() does only seem to accept ArrayAccess|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

126
        $this->assertArrayHasKey('t', /** @scrutinizer ignore-type */ $tmItem);
Loading history...
127
        $this->assertArrayHasKey('o', $tmItem);
128
        $this->assertArrayHasKey('c', $tmItem);
129
130
        $this->assertContains('Tm', $tmItem['o']);
131
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
132
    }
133
134
    public function testExtractDecodedRawData()
135
    {
136
        // Document with text.
137
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
138
        $parser = $this->getParserInstance();
139
        $document = $parser->parseFile($filename);
140
        $pages = $document->getPages();
141
        $page = $pages[0];
142
        $extractedDecodedRawData = $page->extractDecodedRawData();
143
        $tmItem = $extractedDecodedRawData[1];
144
        $this->assertCount(172, $extractedDecodedRawData);
145
        $this->assertCount(3, $tmItem);
146
147
        $this->assertArrayHasKey('t', $tmItem);
0 ignored issues
show
Bug introduced by
It seems like $tmItem can also be of type Countable and Traversable; however, parameter $array of PHPUnit\Framework\Assert::assertArrayHasKey() does only seem to accept ArrayAccess|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

147
        $this->assertArrayHasKey('t', /** @scrutinizer ignore-type */ $tmItem);
Loading history...
148
        $this->assertArrayHasKey('o', $tmItem);
149
        $this->assertArrayHasKey('c', $tmItem);
150
151
        $this->assertContains('Tm', $tmItem['o']);
152
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
153
154
        $this->assertCount(3, $tmItem);
155
        $this->assertArrayHasKey('t', $tmItem);
156
        $this->assertArrayHasKey('o', $tmItem);
157
        $this->assertArrayHasKey('c', $tmItem);
158
159
        $tjItem = $extractedDecodedRawData[2];
160
        $this->assertContains('TJ', $tjItem['o']);
161
        $this->assertContains('(', $tjItem['c'][0]['t']);
162
        $this->assertContains('D', $tjItem['c'][0]['c']);
163
        $this->assertContains('n', $tjItem['c'][1]['t']);
164
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
165
        $this->assertContains('(', $tjItem['c'][2]['t']);
166
        $this->assertContains('o', $tjItem['c'][2]['c']);
167
    }
168
169
    public function testGetDataCommands()
170
    {
171
        // Document with text.
172
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
173
        $parser = $this->getParserInstance();
174
        $document = $parser->parseFile($filename);
175
        $pages = $document->getPages();
176
        $page = $pages[0];
177
        $dataCommands = $page->getDataCommands();
178
        $this->assertCount(166, $dataCommands);
179
180
        $tmItem = $dataCommands[0];
181
        $this->assertCount(3, $tmItem);
182
        $this->assertArrayHasKey('t', $tmItem);
0 ignored issues
show
Bug introduced by
It seems like $tmItem can also be of type Countable and Traversable; however, parameter $array of PHPUnit\Framework\Assert::assertArrayHasKey() does only seem to accept ArrayAccess|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

182
        $this->assertArrayHasKey('t', /** @scrutinizer ignore-type */ $tmItem);
Loading history...
183
        $this->assertArrayHasKey('o', $tmItem);
184
        $this->assertArrayHasKey('c', $tmItem);
185
186
        $this->assertContains('Tm', $tmItem['o']);
187
        $this->assertContains('0.999429 0 0 1 201.96 720.68', $tmItem['c']);
188
        $tjItem = $dataCommands[1];
189
190
        $this->assertCount(3, $tjItem);
191
        $this->assertArrayHasKey('t', $tjItem);
192
        $this->assertArrayHasKey('o', $tjItem);
193
        $this->assertArrayHasKey('c', $tjItem);
194
195
        $this->assertContains('TJ', $tjItem['o']);
196
        $this->assertContains('(', $tjItem['c'][0]['t']);
197
        $this->assertContains('D', $tjItem['c'][0]['c']);
198
        $this->assertContains('n', $tjItem['c'][1]['t']);
199
        $this->assertContains('0.325008', $tjItem['c'][1]['c']);
200
        $this->assertContains('(', $tjItem['c'][2]['t']);
201
        $this->assertContains('o', $tjItem['c'][2]['c']);
202
    }
203
204
    public function testGetDataTm()
205
    {
206
        // Document with text.
207
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
208
        $parser = $this->getParserInstance();
209
        $document = $parser->parseFile($filename);
210
        $pages = $document->getPages();
211
        $page = $pages[0];
212
213
        $dataTm = $page->getDataTm();
214
        $this->assertCount(81, $dataTm);
215
216
        $item = $dataTm[0];
217
        $this->assertCount(2, $item);
218
        $this->assertCount(6, $item[0]);
219
        $this->assertEquals(
220
            [
221
                '0.999429',
222
                '0',
223
                '0',
224
                '1',
225
                '201.96',
226
                '720.68',
227
            ],
228
            $item[0]
229
        );
230
231
        $this->assertContains('Document title', $item[1]);
232
        $item = $dataTm[2];
233
        $this->assertEquals(
234
            [
235
                '0.999402',
236
                '0',
237
                '0',
238
                '1',
239
                '70.8',
240
                '673.64',
241
            ],
242
            $item[0]
243
        );
244
245
        $this->assertContains('Calibri : Lorem ipsum dolor sit amet, consectetur a', $item[1]);
246
247
        $item = $dataTm[80];
248
        $this->assertEquals(
249
            [
250
                '0.999402',
251
                '0',
252
                '0',
253
                '1',
254
                '343.003',
255
                '81.44',
256
            ],
257
            $item[0]
258
        );
259
        $this->assertContains('nenatis.', $item[1]);
260
261
        // ------------------------------------------------------
262
        // Document is a form
263
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
264
        $document = $parser->parseFile($filename);
265
        $pages = $document->getPages();
266
        $page = $pages[0];
267
        $dataTm = $page->getDataTm();
268
        $item = $dataTm[2];
269
        $this->assertCount(105, $dataTm);
270
        $this->assertCount(2, $item);
271
        $this->assertCount(6, $item[0]);
272
        $this->assertEquals(
273
            [
274
                '1',
275
                '0',
276
                '0',
277
                '1',
278
                '167.3',
279
                '894.58',
280
            ],
281
            $item[0]
282
        );
283
        $this->assertContains('MyName  MyLastName', $item[1]);
284
285
        $item = $dataTm[6];
286
        $this->assertEquals(
287
            [
288
                '1',
289
                '0',
290
                '0',
291
                '1',
292
                '681.94',
293
                '877.42',
294
            ],
295
            $item[0]
296
        );
297
        $this->assertContains('1/1/2020', $item[1]);
298
299
        $item = $dataTm[8];
300
        $this->assertEquals(
301
            [
302
                '1',
303
                '0',
304
                '0',
305
                '1',
306
                '174.86',
307
                '827.14',
308
            ],
309
            $item[0]
310
        );
311
        $this->assertContains('Purchase 1', $item[1]);
312
313
        // ------------------------------------------------------
314
        // Document is another form of the same type
315
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
316
        $document = $parser->parseFile($filename);
317
        $pages = $document->getPages();
318
        $page = $pages[0];
319
        $dataTm = $page->getDataTm();
320
321
        $item = $dataTm[2];
322
        $this->assertCount(105, $dataTm);
323
        $this->assertCount(2, $item);
324
        $this->assertCount(6, $item[0]);
325
        $this->assertEquals(
326
            [
327
                '1',
328
                '0',
329
                '0',
330
                '1',
331
                '167.3',
332
                '894.58',
333
            ],
334
            $item[0]
335
        );
336
        $this->assertContains("Other'sName  Other'sLastName", $item[1]);
337
338
        $item = $dataTm[6];
339
        $this->assertEquals(
340
            [
341
                '1',
342
                '0',
343
                '0',
344
                '1',
345
                '681.94',
346
                '877.42',
347
            ],
348
            $item[0]
349
        );
350
        $this->assertContains('2/2/2020', $item[1]);
351
352
        $item = $dataTm[8];
353
        $this->assertEquals(
354
            [
355
                '1',
356
                '0',
357
                '0',
358
                '1',
359
                '174.86',
360
                '827.14',
361
            ],
362
            $item[0]
363
        );
364
        $this->assertContains('Purchase 2', $item[1]);
365
    }
366
367
    public function testGetTextXY()
368
    {
369
        // Document with text.
370
        $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';
371
        $parser = $this->getParserInstance();
372
        $document = $parser->parseFile($filename);
373
        $pages = $document->getPages();
374
        $page = $pages[0];
375
        $result = $page->getTextXY(201.96, 720.68);
376
        $this->assertCount(1, $result);
377
        $this->assertCount(2, $result[0]);
378
        $this->assertEquals(
379
            [
380
                '0.999429',
381
                '0',
382
                '0',
383
                '1',
384
                '201.96',
385
                '720.68',
386
            ],
387
            $result[0][0]
388
        );
389
        $this->assertContains('Document title', $result[0][1]);
390
391
        $result = $page->getTextXY(201, 720);
392
        $this->assertCount(0, $result);
393
394
        $result = $page->getTextXY(201, 720, 1, 1);
395
        $this->assertCount(1, $result);
396
        $this->assertCount(2, $result[0]);
397
        $this->assertEquals(
398
            [
399
                '0.999429',
400
                '0',
401
                '0',
402
                '1',
403
                '201.96',
404
                '720.68',
405
            ],
406
            $result[0][0]
407
        );
408
        $this->assertContains('Document title', $result[0][1]);
409
410
        // ------------------------------------------------------
411
        // Document is a form
412
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample1.pdf';
413
        $document = $parser->parseFile($filename);
414
        $pages = $document->getPages();
415
        $page = $pages[0];
416
        $result = $page->getTextXY(167, 894, 1, 1);
417
        $this->assertEquals(
418
            [
419
                '1',
420
                '0',
421
                '0',
422
                '1',
423
                '167.3',
424
                '894.58',
425
            ],
426
            $result[0][0]
427
        );
428
        $this->assertContains('MyName  MyLastName', $result[0][1]);
429
430
        $result = $page->getTextXY(681, 877, 1, 1);
431
        $this->assertContains('1/1/2020', $result[0][1]);
432
433
        $result = $page->getTextXY(174, 827, 1, 1);
434
        $this->assertContains('Purchase 1', $result[0][1]);
435
436
        // ------------------------------------------------------
437
        // Document is another form of the same type
438
        $filename = $this->rootDir.'/samples/SimpleInvoiceFilledExample2.pdf';
439
        $document = $parser->parseFile($filename);
440
        $pages = $document->getPages();
441
        $page = $pages[0];
442
        $result = $page->getTextXY(167, 894, 1, 1);
443
        $this->assertEquals(
444
            [
445
                '1',
446
                '0',
447
                '0',
448
                '1',
449
                '167.3',
450
                '894.58',
451
            ],
452
            $result[0][0]
453
        );
454
        $this->assertContains("Other'sName  Other'sLastName", $result[0][1]);
455
456
        $result = $page->getTextXY(681, 877, 1, 1);
457
        $this->assertContains('2/2/2020', $result[0][1]);
458
459
        $result = $page->getTextXY(174, 827, 1, 1);
460
        $this->assertContains('Purchase 2', $result[0][1]);
461
    }
462
}
463