Test Failed
Pull Request — master (#583)
by Konrad
04:39 queued 02:37
created

RawDataParserTest   A

Complexity

Total Complexity 5

Size/Duplication

Total Lines 102
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 32
dl 0
loc 102
rs 10
c 0
b 0
f 0
wmc 5

5 Methods

Rating   Name   Duplication   Size   Complexity  
A testDecodeObjectHeaderIssue405() 0 9 1
A testDecodeXrefStreamIssue479() 0 15 1
A setUp() 0 5 1
A testDecodeXrefStreamIssue356() 0 9 1
A testGetRawObjectIssue372() 0 19 1
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 *
8
 * @date    2020-06-01
9
 *
10
 * @author  Sébastien MALOT <[email protected]>
11
 *
12
 * @date    2017-01-03
13
 *
14
 * @license LGPLv3
15
 *
16
 * @url     <https://github.com/smalot/pdfparser>
17
 *
18
 *  PdfParser is a pdf library written in PHP, extraction oriented.
19
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
20
 *
21
 *  This program is free software: you can redistribute it and/or modify
22
 *  it under the terms of the GNU Lesser General Public License as published by
23
 *  the Free Software Foundation, either version 3 of the License, or
24
 *  (at your option) any later version.
25
 *
26
 *  This program is distributed in the hope that it will be useful,
27
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
28
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29
 *  GNU Lesser General Public License for more details.
30
 *
31
 *  You should have received a copy of the GNU Lesser General Public License
32
 *  along with this program.
33
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
34
 */
35
36
namespace PHPUnitTests\Integration\RawData;
37
38
use PHPUnitTests\TestCase;
39
use Smalot\PdfParser\Config;
40
use Smalot\PdfParser\RawData\RawDataParser;
41
42
class RawDataParserHelper extends RawDataParser
43
{
44
    /**
45
     * Expose protected function "getRawObject".
46
     */
47
    public function exposeGetRawObject($pdfData, $offset = 0)
48
    {
49
        return $this->getRawObject($pdfData, $offset);
50
    }
51
}
52
53
class RawDataParserTest extends TestCase
54
{
55
    protected function setUp(): void
56
    {
57
        parent::setUp();
58
59
        $this->fixture = new RawDataParserHelper([], new Config());
60
    }
61
62
    /**
63
     * Tests buggy behavior of getRawObject.
64
     *
65
     * When PDF has corrupted xref table getRawObject may run into an infinite loop.
66
     *
67
     * @see https://github.com/smalot/pdfparser/issues/372
68
     * @see https://github.com/smalot/pdfparser/pull/377
69
     */
70
    public function testGetRawObjectIssue372(): void
71
    {
72
        // The following $data content is a minimal example to trigger the infinite loop
73
        $data = '<</Producer (eDkºãa˜þõ‚LÅòÕ�PïÙ��)©)>>';
74
75
        // calling "getRawObject" via "exposeGetRawObject" would result in an infinite loop
76
        // if the fix is not there.
77
        $result = $this->fixture->exposeGetRawObject($data);
78
79
        $this->assertEquals(
80
            [
81
                '<<',
82
                [
83
                    ['/', 'Producer', 11],
84
                    ['(', 'eDkºãa˜þõ‚LÅòÕ�PïÙ��', 52],
85
                ],
86
                52,
87
            ],
88
            $result
89
        );
90
    }
91
92
    /**
93
     * Tests buggy behavior of decodeXrefStream.
94
     *
95
     * @see https://github.com/smalot/pdfparser/issues/30
96
     * @see https://github.com/smalot/pdfparser/issues/192
97
     * @see https://github.com/smalot/pdfparser/issues/209
98
     * @see https://github.com/smalot/pdfparser/issues/330
99
     * @see https://github.com/smalot/pdfparser/issues/356
100
     * @see https://github.com/smalot/pdfparser/issues/373
101
     * @see https://github.com/smalot/pdfparser/issues/392
102
     * @see https://github.com/smalot/pdfparser/issues/397
103
     */
104
    public function testDecodeXrefStreamIssue356(): void
105
    {
106
        $filename = $this->rootDir.'/samples/bugs/Issue356.pdf';
107
108
        $parser = $this->getParserInstance();
109
        $document = $parser->parseFile($filename);
110
        $pages = $document->getPages();
111
112
        $this->assertStringContainsString('Ημερήσια έκθεση επιδημιολογικής', $pages[0]->getText());
113
    }
114
115
    public function testDecodeObjectHeaderIssue405(): void
116
    {
117
        $filename = $this->rootDir.'/samples/bugs/Issue405.pdf';
118
119
        $parser = $this->getParserInstance();
120
        $document = $parser->parseFile($filename);
121
        $pages = $document->getPages();
122
123
        $this->assertStringContainsString('Bug fix: PR #405', $pages[0]->getText());
124
    }
125
126
    /**
127
     * Tests buggy behavior of decodeXrefStream.
128
     *
129
     * When PDF has more than one entry in the /Index area (for example by changing
130
     * the document description), only the first entry is used.
131
     * If the fix is not used the array returned by getDetails() contains only the entry
132
     * with the key 'Pages'. All other entries like 'Author', 'Creator', 'Title',
133
     * 'Subject' (which come from the 'Info' object) are not listed, because the
134
     * 'Info' object gets a wrong object id during parsing the data into the xref structure.
135
     * So the object id listed at the /Info entry is not valid and the data of the info object
136
     * cannot be loaded during executing Document::buildDetails().
137
     *
138
     * @see https://github.com/smalot/pdfparser/pull/479
139
     */
140
    public function testDecodeXrefStreamIssue479(): void
141
    {
142
        $filename = $this->rootDir.'/samples/bugs/Issue479.pdf';
143
144
        $parser = $this->getParserInstance();
145
        $document = $parser->parseFile($filename);
146
        $details = $document->getDetails();
147
148
        $this->assertArrayHasKey('Author', $details);
149
        $this->assertArrayHasKey('CreationDate', $details);
150
        $this->assertArrayHasKey('Creator', $details);
151
        $this->assertArrayHasKey('ModDate', $details);
152
        $this->assertArrayHasKey('Producer', $details);
153
        $this->assertArrayHasKey('Subject', $details);
154
        $this->assertArrayHasKey('Title', $details);
155
    }
156
}
157