Test Failed
Push — pr/257 ( 57d61f )
by Konrad
05:10 queued 13s
created

Document::buildDictionary()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 5
c 0
b 0
f 0
nc 3
nop 0
dl 0
loc 10
rs 10
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67
    public function __construct()
68
    {
69
        $this->trailer = new Header([], $this);
70
    }
71
72
    public function init()
73
    {
74
        $this->buildDictionary();
75
76
        $this->buildDetails();
77
78
        // Propagate init to objects.
79
        foreach ($this->objects as $object) {
80
            $object->getHeader()->init();
81
            $object->init();
82
        }
83
    }
84
85
    /**
86
     * Build dictionary based on type header field.
87
     */
88
    protected function buildDictionary()
89
    {
90
        // Build dictionary.
91
        $this->dictionary = [];
92
93
        foreach ($this->objects as $id => $object) {
94
            $type = $object->getHeader()->get('Type')->getContent();
95
96
            if (!empty($type)) {
97
                $this->dictionary[$type][$id] = $id;
98
            }
99
        }
100
    }
101
102
    /**
103
     * Build details array.
104
     */
105
    protected function buildDetails()
106
    {
107
        // Build details array.
108
        $details = [];
109
110
        // Extract document info
111
        if ($this->trailer->has('Info')) {
112
            /** @var PDFObject $info */
113
            $info = $this->trailer->get('Info');
114
            // This could be an ElementMissing object, so we need to check for
115
            // the getHeader method first.
116
            if (null !== $info && method_exists($info, 'getHeader')) {
117
                $details = $info->getHeader()->getDetails();
118
            }
119
        }
120
121
        // Retrieve the page count
122
        try {
123
            $pages = $this->getPages();
124
            $details['Pages'] = \count($pages);
125
        } catch (\Exception $e) {
126
            $details['Pages'] = 0;
127
        }
128
129
        $this->details = $details;
130
    }
131
132
    /**
133
     * @return array
134
     */
135
    public function getDictionary()
136
    {
137
        return $this->dictionary;
138
    }
139
140
    /**
141
     * @param PDFObject[] $objects
142
     */
143
    public function setObjects($objects = [])
144
    {
145
        $this->objects = (array) $objects;
146
147
        $this->init();
148
    }
149
150
    /**
151
     * @return PDFObject[]
152
     */
153
    public function getObjects()
154
    {
155
        return $this->objects;
156
    }
157
158
    /**
159
     * @param string $id
160
     *
161
     * @return PDFObject|Font|Page|Element|null
162
     */
163
    public function getObjectById($id)
164
    {
165
        if (isset($this->objects[$id])) {
166
            return $this->objects[$id];
167
        }
168
169
        return null;
170
    }
171
172
    /**
173
     * @param string $type
174
     * @param string $subtype
175
     *
176
     * @return array
177
     */
178
    public function getObjectsByType($type, $subtype = null)
179
    {
180
        $objects = [];
181
182
        foreach ($this->objects as $id => $object) {
183
            if ($object->getHeader()->get('Type') == $type &&
184
                (null === $subtype || $object->getHeader()->get('Subtype') == $subtype)
185
            ) {
186
                $objects[$id] = $object;
187
            }
188
        }
189
190
        return $objects;
191
    }
192
193
    /**
194
     * @return PDFObject[]
195
     */
196
    public function getFonts()
197
    {
198
        return $this->getObjectsByType('Font');
199
    }
200
201
    /**
202
     * @return Page[]
203
     *
204
     * @throws \Exception
205
     */
206
    public function getPages()
207
    {
208
        if (isset($this->dictionary['Catalog'])) {
209
            // Search for catalog to list pages.
210
            $id = reset($this->dictionary['Catalog']);
211
212
            /** @var Pages $object */
213
            $object = $this->objects[$id]->get('Pages');
214
            if (method_exists($object, 'getPages')) {
215
                return $object->getPages(true);
216
            }
217
        }
218
219
        if (isset($this->dictionary['Pages'])) {
220
            // Search for pages to list kids.
221
            $pages = [];
222
223
            /** @var Pages[] $objects */
224
            $objects = $this->getObjectsByType('Pages');
225
            foreach ($objects as $object) {
226
                $pages = array_merge($pages, $object->getPages(true));
227
            }
228
229
            return $pages;
230
        }
231
232
        if (isset($this->dictionary['Page'])) {
233
            // Search for 'page' (unordered pages).
234
            $pages = $this->getObjectsByType('Page');
235
236
            return array_values($pages);
237
        }
238
239
        throw new \Exception('Missing catalog.');
240
    }
241
242
    /**
243
     * @param Page $page
244
     *
245
     * @return string
246
     */
247
    public function getText(Page $page = null)
0 ignored issues
show
Unused Code introduced by
The parameter $page is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

247
    public function getText(/** @scrutinizer ignore-unused */ Page $page = null)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
248
    {
249
        $texts = [];
250
        $pages = $this->getPages();
251
252
        foreach ($pages as $index => $page) {
253
            /**
254
             * In some cases, the $page variable may be null.
255
             */
256
            if (null === $page) {
257
                continue;
258
            }
259
            if ($text = trim($page->getText())) {
260
                $texts[] = $text;
261
            }
262
        }
263
264
        return implode("\n\n", $texts);
265
    }
266
267
    /**
268
     * @return Header
269
     */
270
    public function getTrailer()
271
    {
272
        return $this->trailer;
273
    }
274
275
    public function setTrailer(Header $trailer)
276
    {
277
        $this->trailer = $trailer;
278
    }
279
280
    /**
281
     * @return array
282
     */
283
    public function getDetails($deep = true)
0 ignored issues
show
Unused Code introduced by
The parameter $deep is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

283
    public function getDetails(/** @scrutinizer ignore-unused */ $deep = true)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
284
    {
285
        return $this->details;
286
    }
287
}
288