1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @file This file is part of the PdfParser library. |
5
|
|
|
* |
6
|
|
|
* @author Konrad Abicht <[email protected]> |
7
|
|
|
* @date 2020-06-01 |
8
|
|
|
* |
9
|
|
|
* @author Sébastien MALOT <[email protected]> |
10
|
|
|
* @date 2017-01-03 |
11
|
|
|
* |
12
|
|
|
* @license LGPLv3 |
13
|
|
|
* @url <https://github.com/smalot/pdfparser> |
14
|
|
|
* |
15
|
|
|
* PdfParser is a pdf library written in PHP, extraction oriented. |
16
|
|
|
* Copyright (C) 2017 - Sébastien MALOT <[email protected]> |
17
|
|
|
* |
18
|
|
|
* This program is free software: you can redistribute it and/or modify |
19
|
|
|
* it under the terms of the GNU Lesser General Public License as published by |
20
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
21
|
|
|
* (at your option) any later version. |
22
|
|
|
* |
23
|
|
|
* This program is distributed in the hope that it will be useful, |
24
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
25
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
26
|
|
|
* GNU Lesser General Public License for more details. |
27
|
|
|
* |
28
|
|
|
* You should have received a copy of the GNU Lesser General Public License |
29
|
|
|
* along with this program. |
30
|
|
|
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>. |
31
|
|
|
*/ |
32
|
|
|
|
33
|
|
|
namespace Tests\Smalot\PdfParser\Performance\Test; |
34
|
|
|
|
35
|
|
|
use Smalot\PdfParser\Page; |
36
|
|
|
use Smalot\PdfParser\Parser; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* This test checks does a performance test with certain PDF files that extensively use |
40
|
|
|
* the getFirstFont() method of Document.php. If Document.php correctly uses a dictionary |
41
|
|
|
* to cache the objects inside the PDF file, then the parsing should be quick. |
42
|
|
|
* If it does not, the parsing can be extensively slow or even crash. |
43
|
|
|
*/ |
44
|
|
|
class DocumentDictionaryCacheTest extends AbstractPerformanceTest |
45
|
|
|
{ |
46
|
|
|
/** |
47
|
|
|
* @var Parser |
48
|
|
|
*/ |
49
|
|
|
protected $parser; |
50
|
|
|
protected $data; |
51
|
|
|
|
52
|
|
|
public function init(): void |
53
|
|
|
{ |
54
|
|
|
$this->parser = new Parser(); |
55
|
|
|
|
56
|
|
|
// load PDF file content |
57
|
|
|
$this->data = file_get_contents(__DIR__.'/../../../samples/DocumentWithLotsOfObjects.pdf'); |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
public function run(): void |
61
|
|
|
{ |
62
|
|
|
// give PDF content to function and parse it |
63
|
|
|
$pdf = $this->parser->parseContent($this->data); |
64
|
|
|
|
65
|
|
|
$pages = $pdf->getPages(); |
66
|
|
|
|
67
|
|
|
foreach ($pages as $i => $page) { /* @var $page Page */ |
68
|
|
|
if ($i < 77) { |
69
|
|
|
continue; |
70
|
|
|
} |
71
|
|
|
if ($i > 78) { |
72
|
|
|
continue; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
$page->getText(); // Test this method |
76
|
|
|
} |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
public function getMaxEstimatedTime(): int |
80
|
|
|
{ |
81
|
|
|
return 20; |
82
|
|
|
} |
83
|
|
|
} |
84
|
|
|
|