Completed
Push — master ( 6bc9dc...7f2d31 )
by Konrad
15s queued 12s
created

Page::testExtractDecodedRawData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 25
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
eloc 22
c 2
b 0
f 0
nc 1
nop 0
dl 0
loc 25
rs 9.568
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser\Tests\Units;
32
33
use mageekguy\atoum;
34
35
/**
36
 * Class Page
37
 */
38
class Page extends atoum\test
39
{
40
    public function testGetFonts()
41
    {
42
        // Document with text.
43
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
44
        $parser = new \Smalot\PdfParser\Parser();
45
        $document = $parser->parseFile($filename);
46
        $pages = $document->getPages();
47
        $page = $pages[0];
48
49
        // the first to load data.
50
        $fonts = $page->getFonts();
51
        $this->assert->array($fonts)->isNotEmpty();
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
52
        foreach ($fonts as $font) {
53
            $this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
54
        }
55
        // the second to use cache.
56
        $fonts = $page->getFonts();
57
        $this->assert->array($fonts)->isNotEmpty();
58
59
        // ------------------------------------------------------
60
        // Document without text.
61
        $filename = __DIR__.'/../../../../../samples/Document3_pdfcreator_nocompressed.pdf';
62
        $document = $parser->parseFile($filename);
63
        $pages = $document->getPages();
64
        $page = $pages[0];
65
66
        // the first to load data.
67
        $fonts = $page->getFonts();
68
        $this->assert->array($fonts)->isEmpty();
69
        // the second to use cache.
70
        $fonts = $page->getFonts();
71
        $this->assert->array($fonts)->isEmpty();
72
    }
73
74
    public function testGetFont()
75
    {
76
        // Document with text.
77
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
78
        $parser = new \Smalot\PdfParser\Parser();
79
        $document = $parser->parseFile($filename);
80
        $pages = $document->getPages();
81
        $page = $pages[0];
82
83
        // the first to load data.
84
        $font = $page->getFont('R7');
85
        $this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
86
        $font = $page->getFont('ABC7');
87
        $this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
88
    }
89
90
    public function testGetText()
91
    {
92
        // Document with text.
93
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
94
        $parser = new \Smalot\PdfParser\Parser();
95
        $document = $parser->parseFile($filename);
96
        $pages = $document->getPages();
97
        $page = $pages[0];
98
        $text = $page->getText();
99
100
        $this->assert->string($text)->hasLengthGreaterThan(150);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
101
        $this->assert->string($text)->contains('Document title');
102
        $this->assert->string($text)->contains('Lorem ipsum');
103
104
        $this->assert->string($text)->contains('Calibri');
105
        $this->assert->string($text)->contains('Arial');
106
        $this->assert->string($text)->contains('Times');
107
        $this->assert->string($text)->contains('Courier New');
108
        $this->assert->string($text)->contains('Verdana');
109
    }
110
111
    public function testExtractRawData()
112
    {
113
        // Document with text.
114
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
115
        $parser = new \Smalot\PdfParser\Parser();
116
        $document = $parser->parseFile($filename);
117
        $pages = $document->getPages();
118
        $page = $pages[0];
119
        $extractedRawData = $page->extractRawData();
120
        $tmItem = $extractedRawData[1];
121
122
        $this->assert->array($extractedRawData)->hasSize(172);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
123
        $this->assert->array($tmItem)->hasSize(3);
124
        $this->assert->array($tmItem)->hasKeys(['t', 'o', 'c']);
125
        $this->assert->string($tmItem['o'])->contains('Tm');
126
        $this->assert->string($tmItem['c'])->contains('0.999429 0 0 1 201.96 720.68');
127
    }
128
129
    public function testExtractDecodedRawData()
130
    {
131
        // Document with text.
132
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
133
        $parser = new \Smalot\PdfParser\Parser();
134
        $document = $parser->parseFile($filename);
135
        $pages = $document->getPages();
136
        $page = $pages[0];
137
        $extractedDecodedRawData = $page->extractDecodedRawData();
138
        $tmItem = $extractedDecodedRawData[1];
139
        $this->assert->array($extractedDecodedRawData)->hasSize(172);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
140
        $this->assert->array($tmItem)->hasSize(3);
141
        $this->assert->array($tmItem)->hasKeys(['t', 'o', 'c']);
142
        $this->assert->string($tmItem['o'])->contains('Tm');
143
        $this->assert->string($tmItem['c'])->contains('0.999429 0 0 1 201.96 720.68');
144
        $tjItem = $extractedDecodedRawData[2];
145
        $this->assert->array($tmItem)->hasSize(3);
146
        $this->assert->array($tmItem)->hasKeys(['t', 'o', 'c']);
147
        $this->assert->string($tjItem['o'])->contains('TJ');
148
        $this->assert->string($tjItem['c'][0]['t'])->contains('(');
149
        $this->assert->string($tjItem['c'][0]['c'])->contains('D');
150
        $this->assert->string($tjItem['c'][1]['t'])->contains('n');
151
        $this->assert->string($tjItem['c'][1]['c'])->contains('0.325008');
152
        $this->assert->string($tjItem['c'][2]['t'])->contains('(');
153
        $this->assert->string($tjItem['c'][2]['c'])->contains('o');
154
    }
155
156
    public function testGetDataCommands()
157
    {
158
        // Document with text.
159
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
160
        $parser = new \Smalot\PdfParser\Parser();
161
        $document = $parser->parseFile($filename);
162
        $pages = $document->getPages();
163
        $page = $pages[0];
164
        $dataCommands = $page->getDataCommands();
165
        $tmItem = $dataCommands[0];
166
        $this->assert->array($dataCommands)->hasSize(166);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
167
        $this->assert->array($tmItem)->hasSize(3);
168
        $this->assert->array($tmItem)->hasKeys(['t', 'o', 'c']);
169
        $this->assert->string($tmItem['o'])->contains('Tm');
170
        $this->assert->string($tmItem['c'])->contains('0.999429 0 0 1 201.96 720.68');
171
        $tjItem = $dataCommands[1];
172
        $this->assert->array($tjItem)->hasSize(3);
173
        $this->assert->array($tjItem)->hasKeys(['t', 'o', 'c']);
174
        $this->assert->string($tjItem['o'])->contains('TJ');
175
        $this->assert->string($tjItem['c'][0]['t'])->contains('(');
176
        $this->assert->string($tjItem['c'][0]['c'])->contains('D');
177
        $this->assert->string($tjItem['c'][1]['t'])->contains('n');
178
        $this->assert->string($tjItem['c'][1]['c'])->contains('0.325008');
179
        $this->assert->string($tjItem['c'][2]['t'])->contains('(');
180
        $this->assert->string($tjItem['c'][2]['c'])->contains('o');
181
    }
182
183
    public function testGetDataTm()
184
    {
185
        // Document with text.
186
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
187
        $parser = new \Smalot\PdfParser\Parser();
188
        $document = $parser->parseFile($filename);
189
        $pages = $document->getPages();
190
        $page = $pages[0];
191
        $dataTm = $page->getDataTm();
192
        $item = $dataTm[0];
193
        $this->assert->array($dataTm)->hasSize(81);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
194
        $this->assert->array($item)->hasSize(2);
195
        $this->assert->array($item[0])->hasSize(6);
196
        $this->assert->array($item[0])->containsValues([
197
                                            '0.999429',
198
                                            '0',
199
                                            '0',
200
                                            '1',
201
                                            '201.96',
202
                                            '720.68',
203
        ]);
204
        $this->assert->string($item[1])->contains('Document title');
205
        $item = $dataTm[2];
206
        $this->assert->array($item[0])->containsValues([
207
                                            '0.999402',
208
                                            '0',
209
                                            '0',
210
                                            '1',
211
                                            '70.8',
212
                                            '673.64',
213
        ]);
214
        $this->assert->string($item[1])->contains('Calibri : Lorem ipsum dolor sit amet, consectetur a');
215
        $item = $dataTm[80];
216
        $this->assert->array($item[0])->containsValues([
217
                                            '0.999402',
218
                                            '0',
219
                                            '0',
220
                                            '1',
221
                                            '343.003',
222
                                            '81.44',
223
        ]);
224
        $this->assert->string($item[1])->contains('nenatis.');
225
226
        // ------------------------------------------------------
227
        // Document is a form
228
        $filename = __DIR__.'/../../../../../samples/SimpleInvoiceFilledExample1.pdf';
229
        $document = $parser->parseFile($filename);
230
        $pages = $document->getPages();
231
        $page = $pages[0];
232
        $dataTm = $page->getDataTm();
233
        $item = $dataTm[2];
234
        $this->assert->array($dataTm)->hasSize(105);
235
        $this->assert->array($item)->hasSize(2);
236
        $this->assert->array($item[0])->hasSize(6);
237
        $this->assert->array($item[0])->containsValues([
238
                                            '1',
239
                                            '0',
240
                                            '0',
241
                                            '1',
242
                                            '167.3',
243
                                            '894.58',
244
        ]);
245
        $this->assert->string($item[1])->contains('MyName  MyLastName');
246
        $item = $dataTm[6];
247
        $this->assert->array($item[0])->containsValues([
248
                                            '1',
249
                                            '0',
250
                                            '0',
251
                                            '1',
252
                                            '681.94',
253
                                            '877.42',
254
        ]);
255
        $this->assert->string($item[1])->contains('1/1/2020');
256
        $item = $dataTm[8];
257
        $this->assert->array($item[0])->containsValues([
258
                                            '1',
259
                                            '0',
260
                                            '0',
261
                                            '1',
262
                                            '174.86',
263
                                            '827.14',
264
        ]);
265
        $this->assert->string($item[1])->contains('Purchase 1');
266
267
        // ------------------------------------------------------
268
        // Document is another form of the same type
269
        $filename = __DIR__.'/../../../../../samples/SimpleInvoiceFilledExample2.pdf';
270
        $document = $parser->parseFile($filename);
271
        $pages = $document->getPages();
272
        $page = $pages[0];
273
        $dataTm = $page->getDataTm();
274
        $item = $dataTm[2];
275
        $this->assert->array($dataTm)->hasSize(105);
276
        $this->assert->array($item)->hasSize(2);
277
        $this->assert->array($item[0])->hasSize(6);
278
        $this->assert->array($item[0])->containsValues([
279
                                            '1',
280
                                            '0',
281
                                            '0',
282
                                            '1',
283
                                            '167.3',
284
                                            '894.58',
285
        ]);
286
        $this->assert->string($item[1])->contains("Other'sName  Other'sLastName");
287
        $item = $dataTm[6];
288
        $this->assert->array($item[0])->containsValues([
289
                                            '1',
290
                                            '0',
291
                                            '0',
292
                                            '1',
293
                                            '681.94',
294
                                            '877.42',
295
        ]);
296
        $this->assert->string($item[1])->contains('2/2/2020');
297
        $item = $dataTm[8];
298
        $this->assert->array($item[0])->containsValues([
299
                                            '1',
300
                                            '0',
301
                                            '0',
302
                                            '1',
303
                                            '174.86',
304
                                            '827.14',
305
        ]);
306
        $this->assert->string($item[1])->contains('Purchase 2');
307
    }
308
309
    public function testGetTextXY()
310
    {
311
        // Document with text.
312
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
313
        $parser = new \Smalot\PdfParser\Parser();
314
        $document = $parser->parseFile($filename);
315
        $pages = $document->getPages();
316
        $page = $pages[0];
317
        $result = $page->getTextXY(201.96, 720.68);
318
        $this->assert->array($result)->hasSize(1);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
319
        $this->assert->array($result[0])->hasSize(2);
320
        $this->assert->array($result[0][0])->containsValues([
321
                                            '0.999429',
322
                                            '0',
323
                                            '0',
324
                                            '1',
325
                                            '201.96',
326
                                            '720.68',
327
        ]);
328
        $this->assert->string($result[0][1])->contains('Document title');
329
        $result = $page->getTextXY(201, 720);
330
        $this->assert->array($result)->hasSize(0);
331
        $result = $page->getTextXY(201, 720, 1, 1);
332
        $this->assert->array($result)->hasSize(1);
333
        $this->assert->array($result[0])->hasSize(2);
334
        $this->assert->array($result[0][0])->containsValues([
335
                                            '0.999429',
336
                                            '0',
337
                                            '0',
338
                                            '1',
339
                                            '201.96',
340
                                            '720.68',
341
        ]);
342
        $this->assert->string($result[0][1])->contains('Document title');
343
344
        // ------------------------------------------------------
345
        // Document is a form
346
        $filename = __DIR__.'/../../../../../samples/SimpleInvoiceFilledExample1.pdf';
347
        $document = $parser->parseFile($filename);
348
        $pages = $document->getPages();
349
        $page = $pages[0];
350
        $result = $page->getTextXY(167, 894, 1, 1);
351
        $this->assert->array($result[0][0])->containsValues([
352
                                            '1',
353
                                            '0',
354
                                            '0',
355
                                            '1',
356
                                            '167.3',
357
                                            '894.58',
358
        ]);
359
        $this->assert->string($result[0][1])->contains('MyName  MyLastName');
360
        $result = $page->getTextXY(681, 877, 1, 1);
361
        $this->assert->string($result[0][1])->contains('1/1/2020');
362
        $result = $page->getTextXY(174, 827, 1, 1);
363
        $this->assert->string($result[0][1])->contains('Purchase 1');
364
365
        // ------------------------------------------------------
366
        // Document is another form of the same type
367
        $filename = __DIR__.'/../../../../../samples/SimpleInvoiceFilledExample2.pdf';
368
        $document = $parser->parseFile($filename);
369
        $pages = $document->getPages();
370
        $page = $pages[0];
371
        $result = $page->getTextXY(167, 894, 1, 1);
372
        $this->assert->array($result[0][0])->containsValues([
373
                                            '1',
374
                                            '0',
375
                                            '0',
376
                                            '1',
377
                                            '167.3',
378
                                            '894.58',
379
        ]);
380
        $this->assert->string($result[0][1])->contains("Other'sName  Other'sLastName");
381
        $result = $page->getTextXY(681, 877, 1, 1);
382
        $this->assert->string($result[0][1])->contains('2/2/2020');
383
        $result = $page->getTextXY(174, 827, 1, 1);
384
        $this->assert->string($result[0][1])->contains('Purchase 2');
385
    }
386
}
387