DocumentIssueFocusTest::testGetTextWithPageLimit() - Code Metrics - Inspection of "Major Update to PDFObject.php + Ancillary" - smalot/pdfparser - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Pull Request — master (#634)

unknown

created 2023-09-21 16:48 UTC

DocumentIssueFocusTest::testGetTextWithPageLimit() A

↳ Parent: DocumentIssueFocusTest

Complexity

Conditions	1
Paths	1

Size

Total Lines	6
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	1
eloc	2
c	1
b	0
f	0
nc	1
nop	0
dl	0
loc	6
rs	10

<?php

/**
 * @file This file is part of the PdfParser library.
 *
 * @author  Konrad Abicht <[email protected]>
 *
 * @date    2020-06-01
 *
 * @author  Sébastien MALOT <[email protected]>
 *
 * @date    2017-01-03
 *
 * @license LGPLv3
 *
 * @url     <https://github.com/smalot/pdfparser>
 *
 *  PdfParser is a pdf library written in PHP, extraction oriented.
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
 */

namespace PHPUnitTests\Integration;

use PHPUnitTests\TestCase;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Parser;

/**
 * Document related tests which are related to certain issues.
 */
class DocumentIssueFocusTest extends TestCase
{
    /**
     * Tests getText method without a given page limit.
     *
     * @see https://github.com/smalot/pdfparser/pull/562
     */
    public function testGetTextNoPageLimit(): void
    {
        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');

        self::assertStringContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText());
    }

    /**
     * Tests getText method with a given page limit.
     *
     * @see https://github.com/smalot/pdfparser/pull/562
     */
    public function testGetTextWithPageLimit(): void
    {
        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');

        // given text is on page 2, it has to be ignored because of that
        self::assertStringNotContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText(1));
    }

    /**
     * Tests extraction of XMP Metadata vs. getHeader() data.
     *
     * @see https://github.com/smalot/pdfparser/pull/606
     */
    public function testExtractXMPMetadata(): void
    {
        $document = (new Parser())->parseFile($this->rootDir.'/samples/XMP_Metadata.pdf');

        $details = $document->getDetails();

        // Test that the dc:title data was extracted from the XMP
        // Metadata.
        self::assertStringContainsString("Enhance PdfParser\u{2019}s Metadata Capabilities", $details['dc:title']);
    }

    /**
     * Tests PDFDocEncoding decode of Document Properties
     *
     * @see https://github.com/smalot/pdfparser/issues/609
     */
    public function testPDFDocEncodingDecode(): void
    {
        $document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue609.pdf');

        $details = $document->getDetails();

        // These test that Adobe-inserted \r are removed from a UTF-8
        // escaped metadata string, and the surrounding characters are
        // repaired
        $testKeywords = '˘ˇˆ˙˝˛˞˜•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ﬁﬂŁŒŠŸŽıłœšž€¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ';
        self::assertStringContainsString($testKeywords, $details['Keywords']);

        $testKeywords = 'added line-feeds often destroy multibyte characters';
        self::assertStringContainsString($testKeywords, $details['Keywords']);

        // This tests that the PDFDocEncoding characters that differ
        // from CP-1252 are decoded to their correct UTF-8 code points
        // as well as removing \r line-feeds
        $testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž';
        self::assertStringContainsString($testSubject, $details['Subject']);
    }
}


1			<?php
2
3			/**
4			* @file This file is part of the PdfParser library.
5			*
6			* @author Konrad Abicht <[email protected]>
7			*
8			* @date 2020-06-01
9			*
10			* @author Sébastien MALOT <[email protected]>
11			*
12			* @date 2017-01-03
13			*
14			* @license LGPLv3
15			*
16			* @url <https://github.com/smalot/pdfparser>
17			*
18			* PdfParser is a pdf library written in PHP, extraction oriented.
19			* Copyright (C) 2017 - Sébastien MALOT <[email protected]>
20			*
21			* This program is free software: you can redistribute it and/or modify
22			* it under the terms of the GNU Lesser General Public License as published by
23			* the Free Software Foundation, either version 3 of the License, or
24			* (at your option) any later version.
25			*
26			* This program is distributed in the hope that it will be useful,
27			* but WITHOUT ANY WARRANTY; without even the implied warranty of
28			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29			* GNU Lesser General Public License for more details.
30			*
31			* You should have received a copy of the GNU Lesser General Public License
32			* along with this program.
33			* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
34			*/
35
36			namespace PHPUnitTests\Integration;
37
38			use PHPUnitTests\TestCase;
39			use Smalot\PdfParser\Document;
40			use Smalot\PdfParser\Parser;
41
42			/**
43			* Document related tests which are related to certain issues.
44			*/
45			class DocumentIssueFocusTest extends TestCase
46			{
47			/**
48			* Tests getText method without a given page limit.
49			*
50			* @see https://github.com/smalot/pdfparser/pull/562
51			*/
52			public function testGetTextNoPageLimit(): void
53			{
54			$document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');
55
56			self::assertStringContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText());
57			}
58
59			/**
60			* Tests getText method with a given page limit.
61			*
62			* @see https://github.com/smalot/pdfparser/pull/562
63			*/
64			public function testGetTextWithPageLimit(): void
65			{
66			$document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue331.pdf');
67
68			// given text is on page 2, it has to be ignored because of that
69			self::assertStringNotContainsString('Medeni Usul ve İcra İflas Hukuku', $document->getText(1));
70			}
71
72			/**
73			* Tests extraction of XMP Metadata vs. getHeader() data.
74			*
75			* @see https://github.com/smalot/pdfparser/pull/606
76			*/
77			public function testExtractXMPMetadata(): void
78			{
79			$document = (new Parser())->parseFile($this->rootDir.'/samples/XMP_Metadata.pdf');
80
81			$details = $document->getDetails();
82
83			// Test that the dc:title data was extracted from the XMP
84			// Metadata.
85			self::assertStringContainsString("Enhance PdfParser\u{2019}s Metadata Capabilities", $details['dc:title']);
86			}
87
88			/**
89			* Tests PDFDocEncoding decode of Document Properties
90			*
91			* @see https://github.com/smalot/pdfparser/issues/609
92			*/
93			public function testPDFDocEncodingDecode(): void
94			{
95			$document = (new Parser())->parseFile($this->rootDir.'/samples/bugs/Issue609.pdf');
96
97			$details = $document->getDetails();
98
99			// These test that Adobe-inserted \r are removed from a UTF-8
100			// escaped metadata string, and the surrounding characters are
101			// repaired
102			$testKeywords = '˘ˇˆ˙˝˛˞˜•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ﬁﬂŁŒŠŸŽıłœšž€¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ';
103			self::assertStringContainsString($testKeywords, $details['Keywords']);
104
105			$testKeywords = 'added line-feeds often destroy multibyte characters';
106			self::assertStringContainsString($testKeywords, $details['Keywords']);
107
108			// This tests that the PDFDocEncoding characters that differ
109			// from CP-1252 are decoded to their correct UTF-8 code points
110			// as well as removing \r line-feeds
111			$testSubject = '•†‡…—–ƒ⁄‹›−‰„“”‘’‚™ŁŒŠŸŽıłœšž';
112			self::assertStringContainsString($testSubject, $details['Subject']);
113			}
114			}
115

smalot / pdfparser

Pull Request — master (#634)

DocumentIssueFocusTest::testGetTextWithPageLimit() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like