Passed
Pull Request — master (#435)
by
unknown
02:10
created

DocumentDictionaryCacheTest   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 26
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 14
dl 0
loc 26
rs 10
c 1
b 0
f 0
wmc 4

1 Method

Rating   Name   Duplication   Size   Complexity  
A run() 0 23 4
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Performance;
34
35
use Exception;
36
use Smalot\PdfParser\Element;
37
use Smalot\PdfParser\Encoding;
38
39
/**
40
 * This test checks does a performance test with certain PDF files that extensively use
41
 * the getFirstFont() method of Document.php. If Document.php correctly uses a dictionary
42
 * to cache the objects inside the PDF file, then the parsing should be quick.
43
 * If it does not, the parsing can be extensively slow or even crash.
44
 */
45
class DocumentDictionaryCacheTest extends AbstractPerformanceTest
46
{
47
48
    public function run()
49
    {
50
        $parser = new \Smalot\PdfParser\Parser();
51
52
		// load PDF file content
53
		$data = file_get_contents('https://comserv.cs.ut.ee/home/files/Shoush_ComputerScience_2020.pdf?study=ATILoputoo&reference=76F6FAFD4C9E6981D9A434D32D2E7EE2AE9C49E7');
54
55
		// give PDF content to function and parse it
56
		$pdf = $parser->parseContent($data); 
57
58
		$pages = $pdf->getPages();
59
60
		foreach ($pages as $i => $page) { /** @var $page Page */
61
			if ($i < 77) continue;
62
			if ($i > 78) continue;
63
64
			$startTime = microtime(true);
65
			$pageText = $page->getText();
66
			$endTime = microtime(true);
67
			
68
			echo '<b>Page ' . $i . ' (took ' . ($endTime - $startTime) . ' seconds, ' . round(memory_get_usage() / (1000 * 1000), 2) . ' MB RAM)</b><br>';
69
			var_dump($pageText);
0 ignored issues
show
Security Debugging Code introduced by
var_dump($pageText) looks like debug code. Are you sure you do not want to remove it?
Loading history...
70
			echo '<br><br>';
71
		}
72
    }
73
74
}
75