Test Failed
Pull Request — master (#435)
by
unknown
07:47
created

testDocumentDictionaryCacheTest()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 23
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 13
c 1
b 0
f 0
nc 4
nop 0
dl 0
loc 23
rs 9.8333
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Performance;
34
35
use Exception;
36
use Smalot\PdfParser\Document;
37
use Smalot\PdfParser\Element;
38
use Smalot\PdfParser\Encoding;
39
use Smalot\PdfParser\Encoding\StandardEncoding;
40
use Smalot\PdfParser\Header;
41
use Tests\Smalot\PdfParser\TestCase;
42
43
class DocumentDictionaryCacheTest extends TestCase
44
{
45
	/**
46
	 * This test checks does a performance test with certain PDF files that extensively use 
47
	 * the getFirstFont() method of Document.php. If Document.php correctly uses a dictionary
48
	 * to cache the objects inside the PDF file, then the parsing should be quick. 
49
	 * If it does not, the parsing can be extensively slow or even crash.
50
	 */
51
    public function testDocumentDictionaryCacheTest()
52
    {
53
        $parser = new \Smalot\PdfParser\Parser();
54
55
		// load PDF file content
56
		$data = file_get_contents('https://comserv.cs.ut.ee/home/files/Shoush_ComputerScience_2020.pdf?study=ATILoputoo&reference=76F6FAFD4C9E6981D9A434D32D2E7EE2AE9C49E7');
57
58
		// give PDF content to function and parse it
59
		$pdf = $parser->parseContent($data); 
60
61
		$pages = $pdf->getPages();
62
63
		foreach ($pages as $i => $page) { /** @var $page Page */
64
			if ($i < 77) continue;
65
			if ($i > 78) continue;
66
67
			$startTime = microtime(true);
68
			$pageText = $page->getText();
69
			$endTime = microtime(true);
70
			
71
			echo '<b>Page ' . $i . ' (took ' . ($endTime - $startTime) . ' seconds, ' . round(memory_get_usage() / (1000 * 1000), 2) . ' MB RAM)</b><br>';
72
			var_dump($pageText);
0 ignored issues
show
Security Debugging Code introduced by
var_dump($pageText) looks like debug code. Are you sure you do not want to remove it?
Loading history...
73
			echo '<br><br>';
74
		}
75
    }
76
77
}
78