DocumentDictionaryCacheTest::testDocumentDictionaryCacheTest() - Code Metrics - Inspection of "Fix for #434. Reworked the Document's object cache..." - smalot/pdfparser - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Pull Request — master (#435)

unknown

created 2021-07-12 12:38 UTC

testDocumentDictionaryCacheTest() A

↳ Parent: DocumentDictionaryCacheTest

Complexity

Conditions	4
Paths	4

Size

Total Lines	23
Code Lines	13

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	4
eloc	13
c	1
b	0
f	0
nc	4
nop	0
dl	0
loc	23
rs	9.8333

<?php

/**
 * @file This file is part of the PdfParser library.
 *
 * @author  Konrad Abicht <[email protected]>
 * @date    2020-06-01
 *
 * @author  Sébastien MALOT <[email protected]>
 * @date    2017-01-03
 *
 * @license LGPLv3
 * @url     <https://github.com/smalot/pdfparser>
 *
 *  PdfParser is a pdf library written in PHP, extraction oriented.
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
 */

namespace Tests\Smalot\PdfParser\Performance;

use Exception;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Encoding;
use Smalot\PdfParser\Encoding\StandardEncoding;
use Smalot\PdfParser\Header;
use Tests\Smalot\PdfParser\TestCase;

class DocumentDictionaryCacheTest extends TestCase
{
	/**
	 * This test checks does a performance test with certain PDF files that extensively use 
	 * the getFirstFont() method of Document.php. If Document.php correctly uses a dictionary
	 * to cache the objects inside the PDF file, then the parsing should be quick. 
	 * If it does not, the parsing can be extensively slow or even crash.
	 */
    public function testDocumentDictionaryCacheTest()
    {
        $parser = new \Smalot\PdfParser\Parser();

		// load PDF file content
		$data = file_get_contents('https://comserv.cs.ut.ee/home/files/Shoush_ComputerScience_2020.pdf?study=ATILoputoo&reference=76F6FAFD4C9E6981D9A434D32D2E7EE2AE9C49E7');

		// give PDF content to function and parse it
		$pdf = $parser->parseContent($data); 

		$pages = $pdf->getPages();

		foreach ($pages as $i => $page) { /** @var $page Page */
			if ($i < 77) continue;
			if ($i > 78) continue;

			$startTime = microtime(true);
			$pageText = $page->getText();
			$endTime = microtime(true);
			
			echo '<b>Page ' . $i . ' (took ' . ($endTime - $startTime) . ' seconds, ' . round(memory_get_usage() / (1000 * 1000), 2) . ' MB RAM)</b><br>';
			var_dump($pageText);

			echo '<br><br>';
		}
    }

}


1			<?php
2
3			/**
4			* @file This file is part of the PdfParser library.
5			*
6			* @author Konrad Abicht <[email protected]>
7			* @date 2020-06-01
8			*
9			* @author Sébastien MALOT <[email protected]>
10			* @date 2017-01-03
11			*
12			* @license LGPLv3
13			* @url <https://github.com/smalot/pdfparser>
14			*
15			* PdfParser is a pdf library written in PHP, extraction oriented.
16			* Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17			*
18			* This program is free software: you can redistribute it and/or modify
19			* it under the terms of the GNU Lesser General Public License as published by
20			* the Free Software Foundation, either version 3 of the License, or
21			* (at your option) any later version.
22			*
23			* This program is distributed in the hope that it will be useful,
24			* but WITHOUT ANY WARRANTY; without even the implied warranty of
25			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26			* GNU Lesser General Public License for more details.
27			*
28			* You should have received a copy of the GNU Lesser General Public License
29			* along with this program.
30			* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31			*/
32
33			namespace Tests\Smalot\PdfParser\Performance;
34
35			use Exception;
36			use Smalot\PdfParser\Document;
37			use Smalot\PdfParser\Element;
38			use Smalot\PdfParser\Encoding;
39			use Smalot\PdfParser\Encoding\StandardEncoding;
40			use Smalot\PdfParser\Header;
41			use Tests\Smalot\PdfParser\TestCase;
42
43			class DocumentDictionaryCacheTest extends TestCase
44			{
45			/**
46			* This test checks does a performance test with certain PDF files that extensively use
47			* the getFirstFont() method of Document.php. If Document.php correctly uses a dictionary
48			* to cache the objects inside the PDF file, then the parsing should be quick.
49			* If it does not, the parsing can be extensively slow or even crash.
50			*/
51			public function testDocumentDictionaryCacheTest()
52			{
53			$parser = new \Smalot\PdfParser\Parser();
54
55			// load PDF file content
56			$data = file_get_contents('https://comserv.cs.ut.ee/home/files/Shoush_ComputerScience_2020.pdf?study=ATILoputoo&reference=76F6FAFD4C9E6981D9A434D32D2E7EE2AE9C49E7');
57
58			// give PDF content to function and parse it
59			$pdf = $parser->parseContent($data);
60
61			$pages = $pdf->getPages();
62
63			foreach ($pages as $i => $page) { /** @var $page Page */
64			if ($i < 77) continue;
65			if ($i > 78) continue;
66
67			$startTime = microtime(true);
68			$pageText = $page->getText();
69			$endTime = microtime(true);
70
71			echo '<b>Page ' . $i . ' (took ' . ($endTime - $startTime) . ' seconds, ' . round(memory_get_usage() / (1000 * 1000), 2) . ' MB RAM)</b><br>';
72			var_dump($pageText);
			0 ignored issues – show Security Debugging Code introduced 2021-07-12 12:46 UTC by Report Bug Copy Issue Report `var_dump($pageText)` looks like debug code. Are you sure you do not want to remove it? Loading history...
73			echo '<br><br>';
74			}
75			}
76
77			}
78

smalot / pdfparser

Pull Request — master (#435)

testDocumentDictionaryCacheTest() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like