Passed
Pull Request — master (#435)
by
unknown
02:49
created

Document::getFirstFont()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
c 0
b 0
f 0
nc 1
nop 0
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67 56
    public function __construct()
68
    {
69 56
        $this->trailer = new Header([], $this);
70 56
    }
71
72 37
    public function init()
73
    {
74 37
        $this->buildDictionary();
75
76 37
        $this->buildDetails();
77
78
        // Propagate init to objects.
79 37
        foreach ($this->objects as $object) {
80 37
            $object->getHeader()->init();
81 37
            $object->init();
82
        }
83 37
    }
84
85
    /**
86
     * Build dictionary based on type header field.
87
     */
88 37
    protected function buildDictionary()
89
    {
90
        // Build dictionary.
91 37
        $this->dictionary = [];
92
93 37
        foreach ($this->objects as $id => $object) {
94
            // Cache objects by type and subtype
95 37
            $type = $object->getHeader()->get('Type')->getContent();
96
97 37
            if (null != $type) {
98 37
                if (!isset($this->dictionary[$type])) {
99 37
                    $this->dictionary[$type] = [
100
                        'all' => [],
101
                        'subtype' => [],
102
                    ];
103
                }
104
105 37
                $this->dictionary[$type]['all'][$id] = $object;
106
107 37
                $subtype = $object->getHeader()->get('Subtype')->getContent();
108 37
                if (null != $subtype) {
109 30
                    if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
110 30
                        $this->dictionary[$type]['subtype'][$subtype] = [];
111
                    }
112 30
                    $this->dictionary[$type]['subtype'][$subtype][$id] = $object;
113
                }
114
            }
115
        }
116 37
    }
117
118
    /**
119
     * Build details array.
120
     */
121 37
    protected function buildDetails()
122
    {
123
        // Build details array.
124 37
        $details = [];
125
126
        // Extract document info
127 37
        if ($this->trailer->has('Info')) {
128
            /** @var PDFObject $info */
129 29
            $info = $this->trailer->get('Info');
130
            // This could be an ElementMissing object, so we need to check for
131
            // the getHeader method first.
132 29
            if (null !== $info && method_exists($info, 'getHeader')) {
133 29
                $details = $info->getHeader()->getDetails();
134
            }
135
        }
136
137
        // Retrieve the page count
138
        try {
139 37
            $pages = $this->getPages();
140 36
            $details['Pages'] = \count($pages);
141 2
        } catch (\Exception $e) {
142 2
            $details['Pages'] = 0;
143
        }
144
145 37
        $this->details = $details;
146 37
    }
147
148
    /**
149
     * @return array
150
     */
151 1
    public function getDictionary()
152
    {
153 1
        return $this->dictionary;
154
    }
155
156
    /**
157
     * @param PDFObject[] $objects
158
     */
159 37
    public function setObjects($objects = [])
160
    {
161 37
        $this->objects = (array) $objects;
162
163 37
        $this->init();
164 37
    }
165
166
    /**
167
     * @return PDFObject[]
168
     */
169 1
    public function getObjects()
170
    {
171 1
        return $this->objects;
172
    }
173
174
    /**
175
     * @param string $id
176
     *
177
     * @return PDFObject|Font|Page|Element|null
178
     */
179 34
    public function getObjectById($id)
180
    {
181 34
        if (isset($this->objects[$id])) {
182 34
            return $this->objects[$id];
183
        }
184
185 3
        return null;
186
    }
187
188
    /**
189
     * @param ?string $subtype
190
     */
191 38
    public function hasObjectsByType(string $type, ?string $subtype = null): bool
192
    {
193 38
        return 0 < \count($this->getObjectsByType($type, $subtype));
194
    }
195
196
    /**
197
     * @param string $type
198
     * @param string $subtype
199
     *
200
     * @return array
201
     */
202 38
    public function getObjectsByType($type, $subtype = null)
203
    {
204 38
        if (!isset($this->dictionary[$type])) {
205 9
            return [];
206
        }
207
208 36
        if (null != $subtype) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $subtype of type null|string against null; this is ambiguous if the string can be empty. Consider using a strict comparison !== instead.
Loading history...
209
            if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
210
                return [];
211
            }
212
213
            return $this->dictionary[$type]['subtype'][$subtype];
214
        }
215
216 36
        return $this->dictionary[$type]['all'];
217
    }
218
219
    /**
220
     * @return PDFObject[]
221
     */
222 19
    public function getFonts()
223
    {
224 19
        return $this->getObjectsByType('Font');
225
    }
226
227
    /**
228
     * @return ?PDFObject
229
     */
230 14
    public function getFirstFont(): ?PDFObject
231
    {
232 14
        $fonts = $this->getFonts();
233
234 14
        return reset($fonts);
235
    }
236
237
    /**
238
     * @return Page[]
239
     *
240
     * @throws \Exception
241
     */
242 38
    public function getPages()
243
    {
244 38
        if ($this->hasObjectsByType('Catalog')) {
245
            // Search for catalog to list pages.
246 30
            $catalogues = $this->getObjectsByType('Catalog');
247 30
            $catalogue = reset($catalogues);
248
249
            /** @var Pages $object */
250 30
            $object = $catalogue->get('Pages');
251 30
            if (method_exists($object, 'getPages')) {
252 30
                return $object->getPages(true);
253
            }
254
        }
255
256 9
        if ($this->hasObjectsByType('Pages')) {
257
            // Search for pages to list kids.
258 1
            $pages = [];
259
260
            /** @var Pages[] $objects */
261 1
            $objects = $this->getObjectsByType('Pages');
262 1
            foreach ($objects as $object) {
263 1
                $pages = array_merge($pages, $object->getPages(true));
264
            }
265
266 1
            return $pages;
267
        }
268
269 9
        if ($this->hasObjectsByType('Page')) {
270
            // Search for 'page' (unordered pages).
271 7
            $pages = $this->getObjectsByType('Page');
272
273 7
            return array_values($pages);
274
        }
275
276 3
        throw new \Exception('Missing catalog.');
277
    }
278
279
    /**
280
     * @param Page $page
281
     *
282
     * @return string
283
     */
284 8
    public function getText(Page $page = null)
0 ignored issues
show
Unused Code introduced by
The parameter $page is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

284
    public function getText(/** @scrutinizer ignore-unused */ Page $page = null)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
285
    {
286 8
        $texts = [];
287 8
        $pages = $this->getPages();
288
289 8
        foreach ($pages as $index => $page) {
290
            /**
291
             * In some cases, the $page variable may be null.
292
             */
293 8
            if (null === $page) {
294
                continue;
295
            }
296 8
            if ($text = trim($page->getText())) {
297 8
                $texts[] = $text;
298
            }
299
        }
300
301 8
        return implode("\n\n", $texts);
302
    }
303
304
    /**
305
     * @return Header
306
     */
307
    public function getTrailer()
308
    {
309
        return $this->trailer;
310
    }
311
312 29
    public function setTrailer(Header $trailer)
313
    {
314 29
        $this->trailer = $trailer;
315 29
    }
316
317
    /**
318
     * @return array
319
     */
320
    public function getDetails($deep = true)
0 ignored issues
show
Unused Code introduced by
The parameter $deep is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

320
    public function getDetails(/** @scrutinizer ignore-unused */ $deep = true)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
321
    {
322
        return $this->details;
323
    }
324
}
325