Completed
Pull Request — master (#296)
by
unknown
02:23
created

Page::testExtractRawData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 16
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 12
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 16
rs 9.8666
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser\Tests\Units;
32
33
use mageekguy\atoum;
34
35
/**
36
 * Class Page
37
 */
38
class Page extends atoum\test
39
{
40
    public function testGetFonts()
41
    {
42
        // Document with text.
43
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
44
        $parser = new \Smalot\PdfParser\Parser();
45
        $document = $parser->parseFile($filename);
46
        $pages = $document->getPages();
47
        $page = $pages[0];
48
49
        // the first to load data.
50
        $fonts = $page->getFonts();
51
        $this->assert->array($fonts)->isNotEmpty();
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
52
        foreach ($fonts as $font) {
53
            $this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
54
        }
55
        // the second to use cache.
56
        $fonts = $page->getFonts();
57
        $this->assert->array($fonts)->isNotEmpty();
58
59
        // ------------------------------------------------------
60
        // Document without text.
61
        $filename = __DIR__.'/../../../../../samples/Document3_pdfcreator_nocompressed.pdf';
62
        $document = $parser->parseFile($filename);
63
        $pages = $document->getPages();
64
        $page = $pages[0];
65
66
        // the first to load data.
67
        $fonts = $page->getFonts();
68
        $this->assert->array($fonts)->isEmpty();
69
        // the second to use cache.
70
        $fonts = $page->getFonts();
71
        $this->assert->array($fonts)->isEmpty();
72
    }
73
74
    public function testGetFont()
75
    {
76
        // Document with text.
77
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
78
        $parser = new \Smalot\PdfParser\Parser();
79
        $document = $parser->parseFile($filename);
80
        $pages = $document->getPages();
81
        $page = $pages[0];
82
83
        // the first to load data.
84
        $font = $page->getFont('R7');
85
        $this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
86
        $font = $page->getFont('ABC7');
87
        $this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
88
    }
89
90
    public function testGetText()
91
    {
92
        // Document with text.
93
        $filename = __DIR__.'/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
94
        $parser = new \Smalot\PdfParser\Parser();
95
        $document = $parser->parseFile($filename);
96
        $pages = $document->getPages();
97
        $page = $pages[0];
98
        $text = $page->getText();
99
100
        $this->assert->string($text)->hasLengthGreaterThan(150);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
101
        $this->assert->string($text)->contains('Document title');
102
        $this->assert->string($text)->contains('Lorem ipsum');
103
104
        $this->assert->string($text)->contains('Calibri');
105
        $this->assert->string($text)->contains('Arial');
106
        $this->assert->string($text)->contains('Times');
107
        $this->assert->string($text)->contains('Courier New');
108
        $this->assert->string($text)->contains('Verdana');
109
    }
110
    
111
    public function testExtractRawData()
112
    {
113
        // Document with text.
114
        $filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
115
        $parser   = new \Smalot\PdfParser\Parser();
116
        $document = $parser->parseFile($filename);
117
        $pages    = $document->getPages();
118
        $page     = $pages[0];
119
        $extractedRawData     = $page->extractRawData();
120
        $tmItem = $extractedRawData[1];
121
122
        $this->assert->array($extractedRawData)->hasSize(172);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
123
        $this->assert->array($tmItem)->hasSize(3);
124
        $this->assert->array($tmItem)->hasKeys(["t", "o", "c"]);
125
        $this->assert->string($tmItem["o"])->contains('Tm');
126
        $this->assert->string($tmItem["c"])->contains('0.999429 0 0 1 201.96 720.68');
127
    }
128
    
129
    public function testExtractDecodedRawData()
130
    {
131
        // Document with text.
132
        $filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
133
        $parser   = new \Smalot\PdfParser\Parser();
134
        $document = $parser->parseFile($filename);
135
        $pages    = $document->getPages();
136
        $page     = $pages[0];
137
        $extractedDecodedRawData     = $page->extractDecodedRawData();
138
        $tmItem = $extractedDecodedRawData[1];
139
        $this->assert->array($extractedDecodedRawData)->hasSize(172);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
140
        $this->assert->array($tmItem)->hasSize(3);
141
        $this->assert->array($tmItem)->hasKeys(["t", "o", "c"]);
142
        $this->assert->string($tmItem["o"])->contains('Tm');
143
        $this->assert->string($tmItem["c"])->contains('0.999429 0 0 1 201.96 720.68');
144
        $tjItem = $extractedDecodedRawData[2];
145
        $this->assert->array($tmItem)->hasSize(3);
146
        $this->assert->array($tmItem)->hasKeys(["t", "o", "c"]);
147
        $this->assert->string($tjItem["o"])->contains('TJ');
148
        $this->assert->string($tjItem["c"][0]["t"])->contains('(');
149
        $this->assert->string($tjItem["c"][0]["c"])->contains('D');
150
        $this->assert->string($tjItem["c"][1]["t"])->contains('n');
151
        $this->assert->string($tjItem["c"][1]["c"])->contains('0.325008');
152
        $this->assert->string($tjItem["c"][2]["t"])->contains('(');
153
        $this->assert->string($tjItem["c"][2]["c"])->contains('o');
154
    }
155
    
156
    public function testGetDataCommands()
157
    {
158
        // Document with text.
159
        $filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
160
        $parser   = new \Smalot\PdfParser\Parser();
161
        $document = $parser->parseFile($filename);
162
        $pages    = $document->getPages();
163
        $page     = $pages[0];
164
        $dataCommands     = $page->getDataCommands();
165
        $tmItem = $dataCommands[0];
166
        $this->assert->array($dataCommands)->hasSize(166);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
167
        $this->assert->array($tmItem)->hasSize(3);
168
        $this->assert->array($tmItem)->hasKeys(["t", "o", "c"]);
169
        $this->assert->string($tmItem["o"])->contains('Tm');
170
        $this->assert->string($tmItem["c"])->contains('0.999429 0 0 1 201.96 720.68');
171
        $tjItem = $dataCommands[1];
172
        $this->assert->array($tjItem)->hasSize(3);
173
        $this->assert->array($tjItem)->hasKeys(["t", "o", "c"]);
174
        $this->assert->string($tjItem["o"])->contains('TJ');
175
        $this->assert->string($tjItem["c"][0]["t"])->contains('(');
176
        $this->assert->string($tjItem["c"][0]["c"])->contains('D');
177
        $this->assert->string($tjItem["c"][1]["t"])->contains('n');
178
        $this->assert->string($tjItem["c"][1]["c"])->contains('0.325008');
179
        $this->assert->string($tjItem["c"][2]["t"])->contains('(');
180
        $this->assert->string($tjItem["c"][2]["c"])->contains('o');
181
    }
182
    
183
    public function testGetDataTm()
184
    {
185
        // Document with text.
186
        $filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
187
        $parser   = new \Smalot\PdfParser\Parser();
188
        $document = $parser->parseFile($filename);
189
        $pages    = $document->getPages();
190
        $page     = $pages[0];
191
        $dataTm     = $page->getDataTm();
192
        $item = $dataTm[0];
193
        $this->assert->array($dataTm)->hasSize(81);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
194
        $this->assert->array($item)->hasSize(2);
195
        $this->assert->array($item[0])->hasSize(6);
196
        $this->assert->array($item[0])->containsValues([
197
                                            '0.999429',
198
                                            '0',
199
                                            '0',
200
                                            '1',
201
                                            '201.96',
202
                                            '720.68'
203
        ]);
204
        $this->assert->string($item[1])->contains('Document title');
205
        $item = $dataTm[2];
206
        $this->assert->array($item[0])->containsValues([
207
                                            '0.999402',
208
                                            '0',
209
                                            '0',
210
                                            '1',
211
                                            '70.8',
212
                                            '673.64'
213
        ]);
214
        $this->assert->string($item[1])->contains('Calibri : Lorem ipsum dolor sit amet, consectetur a');
215
        $item = $dataTm[80];
216
        $this->assert->array($item[0])->containsValues([
217
                                            '0.999402',
218
                                            '0',
219
                                            '0',
220
                                            '1',
221
                                            '343.003',
222
                                            '81.44'
223
        ]);
224
        $this->assert->string($item[1])->contains('nenatis.');
225
        
226
        // ------------------------------------------------------
227
        // Document is a form
228
        $filename = __DIR__ . '/../../../../../samples/SimpleInvoiceFilledExample1.pdf';
229
        $document = $parser->parseFile($filename);
230
        $pages    = $document->getPages();
231
        $page     = $pages[0];
232
        $dataTm     = $page->getDataTm();
233
        $item = $dataTm[2];
234
        $this->assert->array($dataTm)->hasSize(105);
235
        $this->assert->array($item)->hasSize(2);
236
        $this->assert->array($item[0])->hasSize(6);
237
        $this->assert->array($item[0])->containsValues([
238
                                            '1',
239
                                            '0',
240
                                            '0',
241
                                            '1',
242
                                            '167.3',
243
                                            '894.58'
244
        ]);
245
        $this->assert->string($item[1])->contains('MyName  MyLastName');
246
        $item = $dataTm[6];
247
        $this->assert->array($item[0])->containsValues([
248
                                            '1',
249
                                            '0',
250
                                            '0',
251
                                            '1',
252
                                            '681.94',
253
                                            '877.42'
254
        ]);
255
        $this->assert->string($item[1])->contains('1/1/2020');
256
        $item = $dataTm[8];
257
        $this->assert->array($item[0])->containsValues([
258
                                            '1',
259
                                            '0',
260
                                            '0',
261
                                            '1',
262
                                            '174.86',
263
                                            '827.14'
264
        ]);
265
        $this->assert->string($item[1])->contains('Purchase 1');
266
        
267
        // ------------------------------------------------------
268
        // Document is another form of the same type
269
        $filename = __DIR__ . '/../../../../../samples/SimpleInvoiceFilledExample2.pdf';
270
        $document = $parser->parseFile($filename);
271
        $pages    = $document->getPages();
272
        $page     = $pages[0];
273
        $dataTm     = $page->getDataTm();
274
        $item = $dataTm[2];
275
        $this->assert->array($dataTm)->hasSize(105);
276
        $this->assert->array($item)->hasSize(2);
277
        $this->assert->array($item[0])->hasSize(6);
278
        $this->assert->array($item[0])->containsValues([
279
                                            '1',
280
                                            '0',
281
                                            '0',
282
                                            '1',
283
                                            '167.3',
284
                                            '894.58'
285
        ]);
286
        $this->assert->string($item[1])->contains("Other'sName  Other'sLastName");
287
        $item = $dataTm[6];
288
        $this->assert->array($item[0])->containsValues([
289
                                            '1',
290
                                            '0',
291
                                            '0',
292
                                            '1',
293
                                            '681.94',
294
                                            '877.42'
295
        ]);
296
        $this->assert->string($item[1])->contains('2/2/2020');
297
        $item = $dataTm[8];
298
        $this->assert->array($item[0])->containsValues([
299
                                            '1',
300
                                            '0',
301
                                            '0',
302
                                            '1',
303
                                            '174.86',
304
                                            '827.14'
305
        ]);
306
        $this->assert->string($item[1])->contains('Purchase 2');
307
    }
308
    
309
    
310
    public function testGetTextXY()
311
    {
312
        // Document with text.
313
        $filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
314
        $parser   = new \Smalot\PdfParser\Parser();
315
        $document = $parser->parseFile($filename);
316
        $pages    = $document->getPages();
317
        $page     = $pages[0];
318
        $result = $page->getTextXY(201.96, 720.68);
319
        $this->assert->array($result)->hasSize(1);
0 ignored issues
show
Bug Best Practice introduced by
The property assert does not exist on Smalot\PdfParser\Tests\Units\Page. Since you implemented __get, consider adding a @property annotation.
Loading history...
320
        $this->assert->array($result[0])->hasSize(2);
321
        $this->assert->array($result[0][0])->containsValues([
322
                                            '0.999429',
323
                                            '0',
324
                                            '0',
325
                                            '1',
326
                                            '201.96',
327
                                            '720.68'
328
        ]);
329
        $this->assert->string($result[0][1])->contains("Document title");
330
        $result = $page->getTextXY(201, 720);
331
        $this->assert->array($result)->hasSize(0);
332
        $result = $page->getTextXY(201, 720, 1, 1);
333
        $this->assert->array($result)->hasSize(1);
334
        $this->assert->array($result[0])->hasSize(2);
335
        $this->assert->array($result[0][0])->containsValues([
336
                                            '0.999429',
337
                                            '0',
338
                                            '0',
339
                                            '1',
340
                                            '201.96',
341
                                            '720.68'
342
        ]);
343
        $this->assert->string($result[0][1])->contains("Document title");
344
        
345
        // ------------------------------------------------------
346
        // Document is a form
347
        $filename = __DIR__ . '/../../../../../samples/SimpleInvoiceFilledExample1.pdf';
348
        $document = $parser->parseFile($filename);
349
        $pages    = $document->getPages();
350
        $page     = $pages[0];
351
        $result     = $page->getTextXY(167, 894, 1 , 1);
352
        $this->assert->array($result[0][0])->containsValues([
353
                                            '1',
354
                                            '0',
355
                                            '0',
356
                                            '1',
357
                                            '167.3',
358
                                            '894.58'
359
        ]);
360
        $this->assert->string($result[0][1])->contains('MyName  MyLastName');
361
        $result     = $page->getTextXY(681, 877, 1 , 1);
362
        $this->assert->string($result[0][1])->contains('1/1/2020');
363
        $result     = $page->getTextXY(174, 827, 1 , 1);
364
        $this->assert->string($result[0][1])->contains('Purchase 1');
365
        
366
        // ------------------------------------------------------
367
        // Document is another form of the same type
368
        $filename = __DIR__ . '/../../../../../samples/SimpleInvoiceFilledExample2.pdf';
369
        $document = $parser->parseFile($filename);
370
        $pages    = $document->getPages();
371
        $page     = $pages[0];
372
        $result     = $page->getTextXY(167, 894, 1 , 1);
373
        $this->assert->array($result[0][0])->containsValues([
374
                                            '1',
375
                                            '0',
376
                                            '0',
377
                                            '1',
378
                                            '167.3',
379
                                            '894.58'
380
        ]);
381
        $this->assert->string($result[0][1])->contains("Other'sName  Other'sLastName");
382
        $result     = $page->getTextXY(681, 877, 1 , 1);
383
        $this->assert->string($result[0][1])->contains('2/2/2020');
384
        $result     = $page->getTextXY(174, 827, 1 , 1);
385
        $this->assert->string($result[0][1])->contains('Purchase 2');
386
    }
387
}
388