Passed
Branch master (f7fac8)
by Sebastien
02:47
created

Document::getPages()   A

Complexity

Conditions 6
Paths 9

Size

Total Lines 35
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 1 Features 0
Metric Value
cc 6
eloc 16
c 4
b 1
f 0
nc 9
nop 0
dl 0
loc 35
rs 9.1111
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 * @license LGPLv3
10
 * @url     <https://github.com/smalot/pdfparser>
11
 *
12
 *  PdfParser is a pdf library written in PHP, extraction oriented.
13
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
14
 *
15
 *  This program is free software: you can redistribute it and/or modify
16
 *  it under the terms of the GNU Lesser General Public License as published by
17
 *  the Free Software Foundation, either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  This program is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU Lesser General Public License for more details.
24
 *
25
 *  You should have received a copy of the GNU Lesser General Public License
26
 *  along with this program.
27
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
28
 *
29
 */
30
31
namespace Smalot\PdfParser;
32
33
use Smalot\PdfParser\Element\ElementDate;
34
35
/**
36
 * Technical references :
37
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
38
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
39
 * - http://www.php.net/manual/en/ref.pdf.php#74211
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
42
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
43
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
44
 *
45
 * Class Document
46
 *
47
 * @package Smalot\PdfParser
48
 */
49
class Document
50
{
51
    /**
52
     * @var PDFObject[]
53
     */
54
    protected $objects = array();
55
56
    /**
57
     * @var array
58
     */
59
    protected $dictionary = array();
60
61
    /**
62
     * @var Header
63
     */
64
    protected $trailer = null;
65
66
    /**
67
     * @var array
68
     */
69
    protected $details = null;
70
71
    /**
72
     *
73
     */
74
    public function __construct()
75
    {
76
        $this->trailer = new Header(array(), $this);
77
    }
78
79
    /**
80
     *
81
     */
82
    public function init()
83
    {
84
        $this->buildDictionary();
85
86
        $this->buildDetails();
87
88
        // Propagate init to objects.
89
        foreach ($this->objects as $object) {
90
            $object->init();
91
        }
92
    }
93
94
    /**
95
     * Build dictionary based on type header field.
96
     */
97
    protected function buildDictionary()
98
    {
99
        // Build dictionary.
100
        $this->dictionary = array();
101
102
        foreach ($this->objects as $id => $object) {
103
            $type = $object->getHeader()->get('Type')->getContent();
104
105
            if (!empty($type)) {
106
                $this->dictionary[$type][$id] = $id;
107
            }
108
        }
109
    }
110
111
    /**
112
     * Build details array.
113
     */
114
    protected function buildDetails()
115
    {
116
        // Build details array.
117
        $details = array();
118
119
        // Extract document info
120
        if ($this->trailer->has('Info')) {
121
            /** @var PDFObject $info */
122
            $info = $this->trailer->get('Info');
123
            // This could be an ElementMissing object, so we need to check for
124
            // the getHeader method first.
125
            if ($info !== null && method_exists($info, 'getHeader')) {
126
                $details = $info->getHeader()->getDetails();
127
            }
128
        }
129
130
        // Retrieve the page count
131
        try {
132
            $pages = $this->getPages();
133
            $details['Pages'] = count($pages);
134
        } catch (\Exception $e) {
135
            $details['Pages'] = 0;
136
        }
137
138
        $this->details = $details;
139
    }
140
141
    /**
142
     * @return array
143
     */
144
    public function getDictionary()
145
    {
146
        return $this->dictionary;
147
    }
148
149
    /**
150
     * @param PDFObject[] $objects
151
     */
152
    public function setObjects($objects = array())
153
    {
154
        $this->objects = (array)$objects;
155
156
        $this->init();
157
    }
158
159
    /**
160
     * @return PDFObject[]
161
     */
162
    public function getObjects()
163
    {
164
        return $this->objects;
165
    }
166
167
    /**
168
     * @param string $id
169
     *
170
     * @return PDFObject
171
     */
172
    public function getObjectById($id)
173
    {
174
        if (isset($this->objects[$id])) {
175
            return $this->objects[$id];
176
        } else {
177
            return null;
178
        }
179
    }
180
181
    /**
182
     * @param string $type
183
     * @param string $subtype
184
     *
185
     * @return PDFObject[]
186
     */
187
    public function getObjectsByType($type, $subtype = null)
188
    {
189
        $objects = array();
190
191
        foreach ($this->objects as $id => $object) {
192
            if ($object->getHeader()->get('Type') == $type &&
193
                (is_null($subtype) || $object->getHeader()->get('Subtype') == $subtype)
194
            ) {
195
                $objects[$id] = $object;
196
            }
197
        }
198
199
        return $objects;
200
    }
201
202
    /**
203
     * @return PDFObject[]
204
     */
205
    public function getFonts()
206
    {
207
        return $this->getObjectsByType('Font');
208
    }
209
210
    /**
211
     * @return Page[]
212
     * @throws \Exception
213
     */
214
    public function getPages()
215
    {
216
        if (isset($this->dictionary['Catalog'])) {
217
            // Search for catalog to list pages.
218
            $id = reset($this->dictionary['Catalog']);
219
220
            /** @var Pages $object */
221
            $object = $this->objects[$id]->get('Pages');
222
            if (method_exists($object, 'getPages')) {
223
                $pages = $object->getPages(true);
224
                return $pages;
225
            }
226
        }
227
228
        if (isset($this->dictionary['Pages'])) {
229
            // Search for pages to list kids.
230
            $pages = array();
231
232
            /** @var Pages[] $objects */
233
            $objects = $this->getObjectsByType('Pages');
234
            foreach ($objects as $object) {
235
                $pages = array_merge($pages, $object->getPages(true));
236
            }
237
238
            return $pages;
239
        }
240
241
        if (isset($this->dictionary['Page'])) {
242
            // Search for 'page' (unordered pages).
243
            $pages = $this->getObjectsByType('Page');
244
245
            return array_values($pages);
246
        }
247
248
        throw new \Exception('Missing catalog.');
249
    }
250
251
    /**
252
     * @param Page $page
253
     *
254
     * @return string
255
     */
256
    public function getText(Page $page = null)
0 ignored issues
show
Unused Code introduced by
The parameter $page is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

256
    public function getText(/** @scrutinizer ignore-unused */ Page $page = null)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
257
    {
258
        $texts = array();
259
        $pages = $this->getPages();
260
261
        foreach ($pages as $index => $page) {
262
            /**
263
             * In some cases, the $page variable may be null.
264
             */
265
            if (is_null($page)) {
266
                continue;
267
            }
268
            if ($text = trim($page->getText())) {
269
                $texts[] = $text;
270
            }
271
        }
272
273
        return implode("\n\n", $texts);
274
    }
275
276
    /**
277
     * @return Header
278
     */
279
    public function getTrailer()
280
    {
281
        return $this->trailer;
282
    }
283
284
    /**
285
     * @param Header $trailer
286
     */
287
    public function setTrailer(Header $trailer)
288
    {
289
        $this->trailer = $trailer;
290
    }
291
292
    /**
293
     * @return array
294
     */
295
    public function getDetails($deep = true)
0 ignored issues
show
Unused Code introduced by
The parameter $deep is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

295
    public function getDetails(/** @scrutinizer ignore-unused */ $deep = true)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
296
    {
297
        return $this->details;
298
    }
299
}
300