RawDataParserHelper - Code Metrics - Inspection of "Fixes failing tests; reorganized test files (PHPUn..." - smalot/pdfparser - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Pull Request — master (#583)

by Konrad

created 2023-03-09 09:15 UTC

RawDataParserHelper A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	8
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	2
dl	0
loc	8
rs	10
c	0
b	0
f	0
wmc	1

1 Method

Rating	Name	Duplication	Size	Complexity
A	exposeGetRawObject()	0	3	1

<?php

/**
 * @file This file is part of the PdfParser library.
 *
 * @author  Konrad Abicht <[email protected]>
 *
 * @date    2020-06-01
 *
 * @author  Sébastien MALOT <[email protected]>
 *
 * @date    2017-01-03
 *
 * @license LGPLv3
 *
 * @url     <https://github.com/smalot/pdfparser>
 *
 *  PdfParser is a pdf library written in PHP, extraction oriented.
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
 */

namespace PHPUnitTests\Integration\RawData;

use PHPUnitTests\TestCase;
use Smalot\PdfParser\Config;
use Smalot\PdfParser\RawData\RawDataParser;

class RawDataParserHelper extends RawDataParser
{
    /**
     * Expose protected function "getRawObject".
     */
    public function exposeGetRawObject($pdfData, $offset = 0)
    {
        return $this->getRawObject($pdfData, $offset);
    }
}

class RawDataParserTest extends TestCase
{
    protected function setUp(): void
    {
        parent::setUp();

        $this->fixture = new RawDataParserHelper([], new Config());
    }

    /**
     * Tests buggy behavior of getRawObject.
     *
     * When PDF has corrupted xref table getRawObject may run into an infinite loop.
     *
     * @see https://github.com/smalot/pdfparser/issues/372
     * @see https://github.com/smalot/pdfparser/pull/377
     */
    public function testGetRawObjectIssue372(): void
    {
        // The following $data content is a minimal example to trigger the infinite loop
        $data = '<</Producer (eDkºãa˜þõ‚LÅòÕ�PïÙ��)©)>>';

        // calling "getRawObject" via "exposeGetRawObject" would result in an infinite loop
        // if the fix is not there.
        $result = $this->fixture->exposeGetRawObject($data);

        $this->assertEquals(
            [
                '<<',
                [
                    ['/', 'Producer', 11],
                    ['(', 'eDkºãa˜þõ‚LÅòÕ�PïÙ��', 52],
                ],
                52,
            ],
            $result
        );
    }

    /**
     * Tests buggy behavior of decodeXrefStream.
     *
     * @see https://github.com/smalot/pdfparser/issues/30
     * @see https://github.com/smalot/pdfparser/issues/192
     * @see https://github.com/smalot/pdfparser/issues/209
     * @see https://github.com/smalot/pdfparser/issues/330
     * @see https://github.com/smalot/pdfparser/issues/356
     * @see https://github.com/smalot/pdfparser/issues/373
     * @see https://github.com/smalot/pdfparser/issues/392
     * @see https://github.com/smalot/pdfparser/issues/397
     */
    public function testDecodeXrefStreamIssue356(): void
    {
        $filename = $this->rootDir.'/samples/bugs/Issue356.pdf';

        $parser = $this->getParserInstance();
        $document = $parser->parseFile($filename);
        $pages = $document->getPages();

        $this->assertStringContainsString('Ημερήσια έκθεση επιδημιολογικής', $pages[0]->getText());
    }

    public function testDecodeObjectHeaderIssue405(): void
    {
        $filename = $this->rootDir.'/samples/bugs/Issue405.pdf';

        $parser = $this->getParserInstance();
        $document = $parser->parseFile($filename);
        $pages = $document->getPages();

        $this->assertStringContainsString('Bug fix: PR #405', $pages[0]->getText());
    }

    /**
     * Tests buggy behavior of decodeXrefStream.
     *
     * When PDF has more than one entry in the /Index area (for example by changing
     * the document description), only the first entry is used.
     * If the fix is not used the array returned by getDetails() contains only the entry
     * with the key 'Pages'. All other entries like 'Author', 'Creator', 'Title',
     * 'Subject' (which come from the 'Info' object) are not listed, because the
     * 'Info' object gets a wrong object id during parsing the data into the xref structure.
     * So the object id listed at the /Info entry is not valid and the data of the info object
     * cannot be loaded during executing Document::buildDetails().
     *
     * @see https://github.com/smalot/pdfparser/pull/479
     */
    public function testDecodeXrefStreamIssue479(): void
    {
        $filename = $this->rootDir.'/samples/bugs/Issue479.pdf';

        $parser = $this->getParserInstance();
        $document = $parser->parseFile($filename);
        $details = $document->getDetails();

        $this->assertArrayHasKey('Author', $details);
        $this->assertArrayHasKey('CreationDate', $details);
        $this->assertArrayHasKey('Creator', $details);
        $this->assertArrayHasKey('ModDate', $details);
        $this->assertArrayHasKey('Producer', $details);
        $this->assertArrayHasKey('Subject', $details);
        $this->assertArrayHasKey('Title', $details);
    }
}


1			<?php
2
3			/**
4			* @file This file is part of the PdfParser library.
5			*
6			* @author Konrad Abicht <[email protected]>
7			*
8			* @date 2020-06-01
9			*
10			* @author Sébastien MALOT <[email protected]>
11			*
12			* @date 2017-01-03
13			*
14			* @license LGPLv3
15			*
16			* @url <https://github.com/smalot/pdfparser>
17			*
18			* PdfParser is a pdf library written in PHP, extraction oriented.
19			* Copyright (C) 2017 - Sébastien MALOT <[email protected]>
20			*
21			* This program is free software: you can redistribute it and/or modify
22			* it under the terms of the GNU Lesser General Public License as published by
23			* the Free Software Foundation, either version 3 of the License, or
24			* (at your option) any later version.
25			*
26			* This program is distributed in the hope that it will be useful,
27			* but WITHOUT ANY WARRANTY; without even the implied warranty of
28			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29			* GNU Lesser General Public License for more details.
30			*
31			* You should have received a copy of the GNU Lesser General Public License
32			* along with this program.
33			* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
34			*/
35
36			namespace PHPUnitTests\Integration\RawData;
37
38			use PHPUnitTests\TestCase;
39			use Smalot\PdfParser\Config;
40			use Smalot\PdfParser\RawData\RawDataParser;
41
42			class RawDataParserHelper extends RawDataParser
43			{
44			/**
45			* Expose protected function "getRawObject".
46			*/
47			public function exposeGetRawObject($pdfData, $offset = 0)
48			{
49			return $this->getRawObject($pdfData, $offset);
50			}
51			}
52
53			class RawDataParserTest extends TestCase
54			{
55			protected function setUp(): void
56			{
57			parent::setUp();
58
59			$this->fixture = new RawDataParserHelper([], new Config());
60			}
61
62			/**
63			* Tests buggy behavior of getRawObject.
64			*
65			* When PDF has corrupted xref table getRawObject may run into an infinite loop.
66			*
67			* @see https://github.com/smalot/pdfparser/issues/372
68			* @see https://github.com/smalot/pdfparser/pull/377
69			*/
70			public function testGetRawObjectIssue372(): void
71			{
72			// The following $data content is a minimal example to trigger the infinite loop
73			$data = '<</Producer (eDkºãa˜þõ‚LÅòÕ�PïÙ��)©)>>';
74
75			// calling "getRawObject" via "exposeGetRawObject" would result in an infinite loop
76			// if the fix is not there.
77			$result = $this->fixture->exposeGetRawObject($data);
78
79			$this->assertEquals(
80			[
81			'<<',
82			[
83			['/', 'Producer', 11],
84			['(', 'eDkºãa˜þõ‚LÅòÕ�PïÙ��', 52],
85			],
86			52,
87			],
88			$result
89			);
90			}
91
92			/**
93			* Tests buggy behavior of decodeXrefStream.
94			*
95			* @see https://github.com/smalot/pdfparser/issues/30
96			* @see https://github.com/smalot/pdfparser/issues/192
97			* @see https://github.com/smalot/pdfparser/issues/209
98			* @see https://github.com/smalot/pdfparser/issues/330
99			* @see https://github.com/smalot/pdfparser/issues/356
100			* @see https://github.com/smalot/pdfparser/issues/373
101			* @see https://github.com/smalot/pdfparser/issues/392
102			* @see https://github.com/smalot/pdfparser/issues/397
103			*/
104			public function testDecodeXrefStreamIssue356(): void
105			{
106			$filename = $this->rootDir.'/samples/bugs/Issue356.pdf';
107
108			$parser = $this->getParserInstance();
109			$document = $parser->parseFile($filename);
110			$pages = $document->getPages();
111
112			$this->assertStringContainsString('Ημερήσια έκθεση επιδημιολογικής', $pages[0]->getText());
113			}
114
115			public function testDecodeObjectHeaderIssue405(): void
116			{
117			$filename = $this->rootDir.'/samples/bugs/Issue405.pdf';
118
119			$parser = $this->getParserInstance();
120			$document = $parser->parseFile($filename);
121			$pages = $document->getPages();
122
123			$this->assertStringContainsString('Bug fix: PR #405', $pages[0]->getText());
124			}
125
126			/**
127			* Tests buggy behavior of decodeXrefStream.
128			*
129			* When PDF has more than one entry in the /Index area (for example by changing
130			* the document description), only the first entry is used.
131			* If the fix is not used the array returned by getDetails() contains only the entry
132			* with the key 'Pages'. All other entries like 'Author', 'Creator', 'Title',
133			* 'Subject' (which come from the 'Info' object) are not listed, because the
134			* 'Info' object gets a wrong object id during parsing the data into the xref structure.
135			* So the object id listed at the /Info entry is not valid and the data of the info object
136			* cannot be loaded during executing Document::buildDetails().
137			*
138			* @see https://github.com/smalot/pdfparser/pull/479
139			*/
140			public function testDecodeXrefStreamIssue479(): void
141			{
142			$filename = $this->rootDir.'/samples/bugs/Issue479.pdf';
143
144			$parser = $this->getParserInstance();
145			$document = $parser->parseFile($filename);
146			$details = $document->getDetails();
147
148			$this->assertArrayHasKey('Author', $details);
149			$this->assertArrayHasKey('CreationDate', $details);
150			$this->assertArrayHasKey('Creator', $details);
151			$this->assertArrayHasKey('ModDate', $details);
152			$this->assertArrayHasKey('Producer', $details);
153			$this->assertArrayHasKey('Subject', $details);
154			$this->assertArrayHasKey('Title', $details);
155			}
156			}
157

smalot / pdfparser

Pull Request — master (#583)

RawDataParserHelper A

Complexity

Size/Duplication

Importance

1 Method

Duplication Side-by-Side

Filter issues like