Test Failed
Push — master ( 5c8274...ce434c )
by Konrad
01:59
created

PDFObjectTest::testGetCommandsText()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 100
Code Lines 54

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 54
nc 1
nop 0
dl 0
loc 100
rs 9.0036
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 *
8
 * @date    2020-06-01
9
 *
10
 * @author  Sébastien MALOT <[email protected]>
11
 *
12
 * @date    2017-01-03
13
 *
14
 * @license LGPLv3
15
 *
16
 * @url     <https://github.com/smalot/pdfparser>
17
 *
18
 *  PdfParser is a pdf library written in PHP, extraction oriented.
19
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
20
 *
21
 *  This program is free software: you can redistribute it and/or modify
22
 *  it under the terms of the GNU Lesser General Public License as published by
23
 *  the Free Software Foundation, either version 3 of the License, or
24
 *  (at your option) any later version.
25
 *
26
 *  This program is distributed in the hope that it will be useful,
27
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
28
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29
 *  GNU Lesser General Public License for more details.
30
 *
31
 *  You should have received a copy of the GNU Lesser General Public License
32
 *  along with this program.
33
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
34
 */
35
36
namespace PHPUnitTests\Integration;
37
38
use PHPUnitTests\TestCase;
39
use Smalot\PdfParser\Document;
40
use Smalot\PdfParser\PDFObject;
41
42
class PDFObjectTest extends TestCase
43
{
44
    public const TYPE = 't';
45
46
    public const OPERATOR = 'o';
47
48
    public const COMMAND = 'c';
49
50
    protected function getPdfObjectInstance($document): PDFObject
51
    {
52
        return new PDFObject($document);
53
    }
54
55
    /**
56
     * @group linux-only
57
     */
58
    public function testGetCommandsText(): void
59
    {
60
        $content = "/R14 30 Tf 0.999016 0 0 1 137.4
61
342.561 Tm
62
[(A)-168.854( BC D)-220.905(\\(E\\))20.905<20>]
63
TJ /R14 17.16 Tf <20> Tj
64
0.999014 0 0 1 336.84 319.161 Tm T* ( \x00m)Tj
65
/R14 20.04 Tf
66
ET Q
67
q -124.774 124.127 5.64213 5.67154 930.307 4436.95 cm
68
BI";
69
70
        $offset = 0;
71
        $parts = $this->getPdfObjectInstance(new Document())->getCommandsText($content, $offset);
72
        $reference = [
73
            [
74
                self::TYPE => '/',
75
                self::OPERATOR => 'Tf',
76
                self::COMMAND => 'R14 30',
77
            ],
78
            [
79
                self::TYPE => '',
80
                self::OPERATOR => 'Tm',
81
                self::COMMAND => "0.999016 0 0 1 137.4\n342.561",
82
            ],
83
            [
84
                self::TYPE => '[',
85
                self::OPERATOR => 'TJ',
86
                self::COMMAND => [
87
                    [
88
                        self::TYPE => '(',
89
                        self::OPERATOR => '',
90
                        self::COMMAND => 'A',
91
                    ],
92
                    [
93
                        self::TYPE => 'n',
94
                        self::OPERATOR => '',
95
                        self::COMMAND => '-168.854',
96
                    ],
97
                    [
98
                        self::TYPE => '(',
99
                        self::OPERATOR => '',
100
                        self::COMMAND => ' BC D',
101
                    ],
102
                    [
103
                        self::TYPE => 'n',
104
                        self::OPERATOR => '',
105
                        self::COMMAND => '-220.905',
106
                    ],
107
                    [
108
                        self::TYPE => '(',
109
                        self::OPERATOR => '',
110
                        self::COMMAND => '\\(E\\)',
111
                    ],
112
                    [
113
                        self::TYPE => 'n',
114
                        self::OPERATOR => '',
115
                        self::COMMAND => '20.905',
116
                    ],
117
                    [
118
                        self::TYPE => '<',
119
                        self::OPERATOR => '',
120
                        self::COMMAND => '20',
121
                    ],
122
                ],
123
            ],
124
            [
125
                self::TYPE => '/',
126
                self::OPERATOR => 'Tf',
127
                self::COMMAND => 'R14 17.16',
128
            ],
129
            [
130
                self::TYPE => '<',
131
                self::OPERATOR => 'Tj',
132
                self::COMMAND => '20',
133
            ],
134
            [
135
                self::TYPE => '',
136
                self::OPERATOR => 'Tm',
137
                self::COMMAND => '0.999014 0 0 1 336.84 319.161',
138
            ],
139
            [
140
                self::TYPE => '',
141
                self::OPERATOR => 'T*',
142
                self::COMMAND => '',
143
            ],
144
            [
145
                self::TYPE => '(',
146
                self::OPERATOR => 'Tj',
147
                self::COMMAND => " \x00m",
148
            ],
149
            [
150
                self::TYPE => '/',
151
                self::OPERATOR => 'Tf',
152
                self::COMMAND => 'R14 20.04',
153
            ],
154
        ];
155
156
        $this->assertEquals($parts, $reference);
157
        $this->assertEquals(172, $offset);
158
    }
159
160
    public function testCleanContent(): void
161
    {
162
        $content = '/Shape <</MCID << /Font<8>>> BT >>BDC
163
Q
164
/CS0 cs 1 1 0  scn
165
1 i
166
/GS0 gs
167
BT
168
/TT0 1 Tf
169
0.0007 Tc 0.0018 Tw 0  Ts 100  Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
170
(Modificatio[ns] au \\(14\\) septembre 2009 ET 2010)Tj
171
EMC
172
(ABC) Tj
173
174
[ (a)-4.5(b)6(c)8.8 ( fsdfsdfsdf[]sd) ] TD
175
176
ET
177
/Shape <</MCID 2 >>BDC
178
q
179
0.03 841';
180
181
        $expected = '_____________________________________
182
Q
183
/CS0 cs 1 1 0  scn
184
1 i
185
/GS0 gs
186
BT
187
/TT0 1 Tf
188
0.0007 Tc 0.0018 Tw 0  Ts 100  Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
189
(________________________________________________)Tj
190
___
191
(___) Tj
192
193
[_____________________________________] TD
194
195
ET
196
______________________
197
q
198
0.03 841';
199
200
        $cleaned = $this->getPdfObjectInstance(new Document())->cleanContent($content, '_');
201
202
        $this->assertEquals($cleaned, $expected);
203
    }
204
205
    /**
206
     * @group linux-only
207
     */
208
    public function testGetSectionText(): void
209
    {
210
        $content = '/Shape <</MCID 1 >>BDC
211
Q
212
/CS0 cs 1 1 0  scn
213
1 i
214
/GS0 gs
215
BT
216
/TT0 1 Tf
217
0.0007 Tc 0.0018 Tw 0  Ts 100  Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
218
(Mod BT atio[ns] au \\(14\\) septembre 2009 ET 2010)Tj
219
EMC
220
(ABC) Tj
221
222
[ (a)-4.5(b) 6(c)8.8 ( fsdfsdfsdf[ sd) ] TD
223
224
ET
225
/Shape <</MCID [BT] >>BDC BT /TT1 1.5 Tf (BT )Tj ET
226
q
227
0.03 841';
228
229
        $sections = $this->getPdfObjectInstance(new Document())->getSectionsText($content);
230
231
        $this->assertEquals(
232
            ['/TT0 1 Tf
233
0.0007 Tc 0.0018 Tw 0  Ts 100  Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
234
(Mod BT atio[ns] au \(14\) septembre 2009 ET 2010)Tj
235
EMC
236
(ABC) Tj
237
238
[ (a)-4.5(b) 6(c)8.8 ( fsdfsdfsdf[ sd) ] TD', '/TT1 1.5 Tf (BT )Tj
239
q'],
240
            $sections
241
        );
242
    }
243
244
    /**
245
     * Tests behavior with reversed chars instruction.
246
     *
247
     * @see: https://github.com/smalot/pdfparser/issues/398
248
     */
249
    public function testReversedChars(): void
250
    {
251
        $filename = $this->rootDir.'/samples/bugs/Issue398.pdf';
252
253
        $parser = $this->getParserInstance();
254
        $document = $parser->parseFile($filename);
255
        $pages = $document->getPages();
256
257
        $this->assertStringContainsString('שלומי טסט', $pages[0]->getText());
258
    }
259
260
    /**
261
     * Tests that a text stream with an improperly selected font code
262
     * page falls back to one that maps all characters.
263
     *
264
     * @see: https://github.com/smalot/pdfparser/issues/586
265
     */
266
    public function testImproperFontFallback(): void
267
    {
268
        $filename = $this->rootDir.'/samples/ImproperFontFallback.pdf';
269
270
        $parser = $this->getParserInstance();
271
        $document = $parser->parseFile($filename);
272
        $pages = $document->getPages();
273
274
        $this->assertStringContainsString('сделал', $pages[0]->getText());
275
    }
276
277
    /**
278
     * Tests that a font ID containing a hyphen / dash character was
279
     * correctly parsed
280
     *
281
     * @see: https://github.com/smalot/pdfparser/issues/145
282
     */
283
    public function testFontIDWithHyphen(): void
284
    {
285
        $pdfObject = $this->getPdfObjectInstance(new Document());
286
287
        $fontCommandHyphen = $pdfObject->getCommandsText('/FID-01 15.00 Tf');
288
289
        $this->assertEquals('/', $fontCommandHyphen[0]['t']);
290
        $this->assertEquals('Tf', $fontCommandHyphen[0]['o']);
291
        $this->assertEquals('FID-01 15.00', $fontCommandHyphen[0]['c']);
292
    }
293
}
294