Test Failed
Push — feature/pdfcpu ( 14c728 )
by Andreas
09:40
created

PdfcpuWrapper::applyBookmarks()   B

Complexity

Conditions 7
Paths 16

Size

Total Lines 48
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 30
c 1
b 0
f 0
dl 0
loc 48
rs 8.5066
cc 7
nc 16
nop 3
1
<?php
2
/**
3
 * pdfcpu wrapper
4
 *
5
 * @copyright 2014-2024 Institute of Legal Medicine, Medical University of Innsbruck
6
 * @author Andreas Erhard <[email protected]>
7
 * @license LGPL-3.0-only
8
 * @link http://www.gerichtsmedizin.at/
9
 *
10
 * @package pdftk
11
 */
12
13
namespace Gmi\Toolkit\Pdftk;
14
15
use Symfony\Component\Process\Process;
16
17
use Gmi\Toolkit\Pdftk\Exception\FileNotFoundException;
18
use Gmi\Toolkit\Pdftk\Exception\NotImplementedException;
19
use Gmi\Toolkit\Pdftk\Exception\PdfException;
20
use Gmi\Toolkit\Pdftk\Util\Escaper;
21
use Gmi\Toolkit\Pdftk\Util\ProcessFactory;
22
23
use Exception;
24
25
/**
26
 * Wrapper for pdfcpu.
27
 *
28
 * @internal Only the methods exposed by the interfaces should be accessed from outside.
29
 */
30
class PdfcpuWrapper implements WrapperInterface, BinaryPathAwareInterface
31
{
32
    use BinaryPathAwareTrait;
33
34
    private const SUPPORTED_METADATA_ATTRIBUTES = [
35
        'Title', 'Keywords', 'Subject', 'Author', 'Creator', 'Producer', 'CreationDate', 'ModificationDate',
36
    ];
37
38
    /**
39
     * @var ProcessFactory
40
     */
41
    private $processFactory;
42
43
    /**
44
     * @var Escaper
45
     */
46
    private $escaper;
47
48
    /**
49
     * Constructor.
50
     *
51
     * @throws FileNotFoundException
52
     */
53
    public function __construct(string $pdftkBinary = null, ProcessFactory $processFactory = null)
54
    {
55
        $this->setBinary($pdftkBinary ?: $this->guessBinary(PHP_OS));
56
        $this->processFactory = $processFactory ?: new ProcessFactory();
57
        $this->escaper = new Escaper();
58
    }
59
60
    /**
61
     * Guesses the pdfcpu binary path based on the operating system.
62
     */
63
    public function guessBinary(string $operatingSystemString): string
64
    {
65
        if (strtoupper(substr($operatingSystemString, 0, 3)) === 'WIN') {
66
            $binary = 'C:\\Program Files\\pdfcpu\\pdfcpu.exe';
67
        } else {
68
            $binary = '/usr/bin/pdfcpu';
69
        }
70
71
        return $binary;
72
    }
73
74
    /**
75
     * {@inheritDoc}
76
     */
77
    public function join(array $filePaths, string $outfile): void
78
    {
79
        $esc = $this->escaper;
80
81
        $filePathsEscaped = array_map(function (string $filePath) use ($esc) {
82
            return $esc->shellArg($filePath);
83
        }, $filePaths);
84
85
        $fileList = implode(' ', $filePathsEscaped);
86
87
        $commandLine = sprintf('%s merge %s %s', $this->getBinary(), $esc->shellArg($outfile), $fileList);
88
89
        /**
90
         * @var Process
91
         */
92
        $process = $this->processFactory->createProcess($commandLine);
93
94
        try {
95
            $process->mustRun();
96
        } catch (Exception $e) {
97
            throw new PdfException($e->getMessage(), 0, $e, $process->getErrorOutput(), $process->getOutput());
98
        }
99
100
        $process->getOutput();
101
    }
102
103
    /**
104
     * {@inheritDoc}
105
     */
106
    public function split(string $infile, array $mapping, string $outputFolder = null): void
107
    {
108
        $esc = $this->escaper;
109
110
        foreach ($mapping as $filename => $pages) {
111
            if ($outputFolder) {
112
                $target = sprintf('%s/%s', $outputFolder, $filename);
113
            } else {
114
                $target = $filename;
115
            }
116
117
            $commandLine = sprintf(
118
                '%s collect -pages %s %s %s',
119
                $this->getBinary(),
120
                implode(',', $pages),
121
                $esc->shellArg($infile),
122
                $esc->shellArg($target)
123
            );
124
125
            $process = $this->processFactory->createProcess($commandLine);
126
127
            try {
128
                $process->mustRun();
129
            } catch (Exception $e) {
130
                throw new PdfException($e->getMessage(), 0, $e, $process->getErrorOutput(), $process->getOutput());
131
            }
132
        }
133
    }
134
135
    /**
136
     * {@inheritDoc}
137
     */
138
    public function reorder(string $infile, array $order, string $outfile = null): void
139
    {
140
        $temporaryOutFile = false;
141
142
        if ($outfile === null || $infile === $outfile) {
143
            $temporaryOutFile = true;
144
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
145
        }
146
147
        $esc = $this->escaper;
148
149
        $commandLine = sprintf(
150
            '%s collect -pages %s %s %s',
151
            $this->getBinary(),
152
            implode(',', $order),
153
            $esc->shellArg($infile),
154
            $esc->shellArg($outfile)
155
        );
156
157
        $process = $this->processFactory->createProcess($commandLine);
158
159
        try {
160
            $process->mustRun();
161
        } catch (Exception $e) {
162
            throw new PdfException(
163
                sprintf('Failed to reorder PDF "%s"! Error: %s', $infile, $e->getMessage()),
164
                0,
165
                $e,
166
                $process->getErrorOutput(),
167
                $process->getOutput()
168
            );
169
        }
170
171
        if ($temporaryOutFile) {
172
            unlink($infile);
173
            rename($outfile, $infile);
174
        }
175
    }
176
177
    /**
178
     * {@inheritDoc}
179
     */
180
    public function applyBookmarks(Bookmarks $bookmarks, string $infile, string $outfile = null): self
181
    {
182
        $temporaryOutFile = false;
183
184
        $this->checkPdfFileExists($infile);
185
        $bookmarksJson = $this->exportBookmarksToJson($bookmarks);
186
        $tempfile = tempnam(sys_get_temp_dir(), 'bookmarks') . '.json';
187
        file_put_contents($tempfile, $bookmarksJson);
188
189
        if ($outfile === null || $infile === $outfile) {
190
            $temporaryOutFile = true;
191
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
192
        }
193
194
        $cmd = sprintf(
195
            '%s bookmarks import %s %s %s',
196
            $this->getBinary(),
197
            $this->escaper->shellArg($infile),
198
            $this->escaper->shellArg($tempfile),
199
            $this->escaper->shellArg($outfile)
200
        );
201
202
        $process = $this->processFactory->createProcess($cmd);
203
204
        try {
205
            $process->mustRun();
206
        } catch (Exception $e) {
207
            $exception = new PdfException(
208
                sprintf('Failed to write PDF bookmarks to "%s"! Error: %s', $outfile, $e->getMessage()),
209
                0,
210
                $e,
211
                $process->getErrorOutput(),
212
                $process->getOutput()
213
            );
214
        }
215
216
        unlink($tempfile);
217
218
        if ($temporaryOutFile && !isset($exception)) {
219
            unlink($infile);
220
            rename($outfile, $infile);
221
        }
222
223
        if (isset($exception)) {
224
            throw $exception;
225
        }
226
227
        return $this;
228
    }
229
230
    /**
231
     * {@inheritDoc}
232
     */
233
    public function importBookmarks(Bookmarks $bookmarks, string $infile): self
234
    {
235
        $tempBookmarksFile = tempnam(sys_get_temp_dir(), 'bookmarks') . '.json';
236
237
        $this->checkPdfFileExists($infile);
238
239
        $cmd = sprintf(
240
            '%s bookmarks export %s %s',
241
            $this->getBinary(),
242
            $this->escaper->shellArg($infile),
243
            $this->escaper->shellArg($tempBookmarksFile)
244
        );
245
246
        $process = $this->processFactory->createProcess($cmd);
247
248
        try {
249
            $process->mustRun();
250
        } catch (Exception $e) {
251
            $exception = new PdfException(
252
                sprintf('Failed to read bookmarks data from "%s"! Error: %s', $infile, $e->getMessage()),
253
                0,
254
                $e,
255
                $process->getErrorOutput(),
256
                $process->getOutput()
257
            );
258
        }
259
260
        if (isset($exception) && false === strpos($process->getErrorOutput(), 'no outlines available')) {
261
            @unlink($tempBookmarksFile);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition for unlink(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

261
            /** @scrutinizer ignore-unhandled */ @unlink($tempBookmarksFile);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
262
            throw $exception;
263
        }
264
265
        $this->importBookmarksFromJson($bookmarks, @file_get_contents($tempBookmarksFile) ?? '');
0 ignored issues
show
Bug introduced by
It seems like @file_get_contents($tempBookmarksFile) ?? '' can also be of type false; however, parameter $json of Gmi\Toolkit\Pdftk\Pdfcpu...portBookmarksFromJson() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

265
        $this->importBookmarksFromJson($bookmarks, /** @scrutinizer ignore-type */ @file_get_contents($tempBookmarksFile) ?? '');
Loading history...
266
267
        @unlink($tempBookmarksFile);
268
269
        return $this;
270
    }
271
272
    /**
273
     * {@inheritDoc}
274
     */
275
    public function importPages(Pages $pages, string $infile): self
276
    {
277
        $this->checkPdfFileExists($infile);
278
279
        $cmd = sprintf('%s info -pages 1- -j %s', $this->getBinary(), $this->escaper->shellArg($infile));
280
281
        $process = $this->processFactory->createProcess($cmd);
282
283
        try {
284
            $process->mustRun();
285
        } catch (Exception $e) {
286
            $exception = new PdfException(
287
                sprintf('Failed to read pages data from "%s"! Error: %s', $infile, $e->getMessage()),
288
                0,
289
                $e,
290
                $process->getErrorOutput(),
291
                $process->getOutput()
292
            );
293
294
            throw $exception;
295
        }
296
297
        /**
298
         * Remove invalid JSON (useless line with the page numbers at the beginning)
299
         * @todo Remove when pdfcpu does not emit the extra pages line before JSON anymore
300
         */
301
        $outputCleaned = preg_replace('/^pages: (\d,?)+$/mu', '', $process->getOutput());
302
        $infoRaw = json_decode($outputCleaned, true);
303
304
        $pageBoundaries = $infoRaw['infos'][0]['pageBoundaries'];
305
306
        // the page numbers in the JSON are strings, not numbers and sorted as strings, ensure natural sort
307
        ksort($pageBoundaries, SORT_NATURAL);
308
309
        foreach ($pageBoundaries as $pageNumber => $pageInfo) {
310
            $page = new Page();
311
312
            $page
313
                ->setPageNumber((int) $pageNumber)
314
                ->setRotation((int) $pageInfo['rot'])
315
                ->setWidth((float) $pageInfo['mediaBox']['rect']['ur']['x'])
316
                ->setHeight((float) $pageInfo['mediaBox']['rect']['ur']['y'])
317
            ;
318
319
            $pages->add($page);
320
        }
321
322
        return $this;
323
    }
324
325
    /**
326
     * {@inheritDoc}
327
     */
328
    public function applyMetadata(Metadata $metadata, string $infile, string $outfile = null): self
329
    {
330
        throw new NotImplementedException('The current pdfcpu version does not support to set metadata!');
331
    }
332
333
    /**
334
     * {@inheritDoc}
335
     */
336
    public function importMetadata(Metadata $metadata, string $infile): self
337
    {
338
        $cmd = sprintf('%s info -j %s', $this->getBinary(), $this->escaper->shellArg($infile));
339
340
        $process = $this->processFactory->createProcess($cmd);
341
342
        try {
343
            $process->mustRun();
344
        } catch (Exception $e) {
345
            throw new PdfException(
346
                sprintf('Failed to read metadata data from "%s"! Error: %s', $infile, $e->getMessage()),
347
                0,
348
                $e,
349
                $process->getErrorOutput(),
350
                $process->getOutput()
351
            );
352
        }
353
354
        $raw = json_decode($process->getOutput(), true);
355
        $metadataArray = $raw['infos'][0];
356
357
        foreach (self::SUPPORTED_METADATA_ATTRIBUTES as $attribute) {
358
            $attributeNormalized = lcfirst($attribute);
359
360
            if ($attributeNormalized === 'keywords' && isset($metadataArray['keywords'])) {
361
                $metadataArray['keywords'] = implode(', ', $metadataArray['keywords']);
362
            }
363
364
            if (isset($metadataArray[$attributeNormalized]) && '' !== trim($metadataArray[$attributeNormalized])) {
365
                $metadata->set($attribute, $metadataArray[$attributeNormalized]);
366
            }
367
        }
368
369
        return $this;
370
    }
371
372
    /**
373
     * Checks whether a PDF file exists.
374
     */
375
    private function checkPdfFileExists($file)
376
    {
377
        if (!file_exists($file)) {
378
            throw new FileNotFoundException(sprintf('PDF "%s" not found', $file));
379
        }
380
    }
381
382
    /**
383
     * Imports bookmarks from a pdfcpu bookmark JSON file.
384
     */
385
    private function importBookmarksFromJson(Bookmarks $bookmarks, string $json): self
386
    {
387
        $raw = json_decode($json, true);
388
        $bookmarksArray = $raw['bookmarks'] ?? [];
389
390
        $this->parseBookmarksTree($bookmarks, $bookmarksArray);
391
392
        return $this;
393
    }
394
395
    /**
396
     * Recursively traverse the bookmarks array and add the bookmarks appropriately.
397
     */
398
    private function parseBookmarksTree(Bookmarks $bookmarks, array $arr, int $level = 1)
399
    {
400
        foreach ($arr as $current) {
401
            $bookmark = new Bookmark();
402
403
            $bookmark
404
                ->setTitle($current['title'])
405
                ->setPageNumber($current['page'])
406
                ->setLevel($level)
407
            ;
408
409
            $bookmarks->add($bookmark);
410
411
            if (isset($current['kids'])) {
412
                $this->parseBookmarksTree($bookmarks, $current['kids'], $level + 1);
413
            }
414
        }
415
    }
416
417
    /**
418
     * Exports bookmarks to a pdfcpu bookmark JSON file.
419
     */
420
    private function exportBookmarksToJson(Bookmarks $bookmarks): string
421
    {
422
        $bookmarksRecursiveArray = $this->buildBookmarksTree($this->buildBookmarksArrayForTree($bookmarks));
423
424
        return json_encode(['bookmarks' => $bookmarksRecursiveArray], JSON_PRETTY_PRINT);
425
    }
426
427
    /**
428
     * Recursively build the JSON tree based on the normalized bookmarks array.
429
     */
430
    private function buildBookmarksTree(array $bookmarksArray, $parentId = null): array
431
    {
432
        $result = [];
433
434
        foreach ($bookmarksArray as $bookmarkItem) {
435
            if ($bookmarkItem['__parent'] === $parentId) {
436
                $children = $this->buildBookmarksTree($bookmarksArray, $bookmarkItem['__id']);
437
                if ($children) {
438
                    $bookmarkItem['kids'] = $children;
439
                }
440
441
                foreach ($bookmarkItem as $key => $value) {
442
                    if (strpos($key, "__") === 0) {
443
                        unset($bookmarkItem[$key]);
444
                    }
445
                }
446
447
                $result[] = $bookmarkItem;
448
            }
449
        }
450
451
        return $result;
452
    }
453
454
    /**
455
     * Builds an array with additional entries prefixed with "__" for level, id and parent id.
456
     */
457
    private function buildBookmarksArrayForTree(Bookmarks $bookmarks): array
458
    {
459
        $bookmarksArray = [];
460
461
        $b = $bookmarks->all();
462
463
        $indexParent = $prevIndexParent = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $prevIndexParent is dead and can be removed.
Loading history...
464
465
        for ($i = 0; $i < count($b); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
466
            $bookmark = $b[$i];
467
            $prevBookmark = $b[$i - 1] ?? null;
468
469
            // bookmark has a higher level (is deeper down) than the previous one
470
            if ($prevBookmark && $prevBookmark->getLevel() < $bookmark->getLevel()) {
471
                $indexParent = $i - 1;
472
            // bookmark has a lower level (is higher up) than the previous one
473
            } elseif ($prevBookmark && $prevBookmark->getLevel() > $bookmark->getLevel()) {
474
                $indexParent = $this->getLastParentId($bookmarksArray, $bookmark->getLevel()) ?? null;
475
            }
476
477
            $bookmarksArray[] = [
478
                'title' => $bookmark->getTitle(),
479
                'page' => $bookmark->getPageNumber(),
480
                '__level' => $bookmark->getLevel(),
481
                '__id' => $i,
482
                '__parent' => $indexParent,
483
            ];
484
        }
485
486
        return $bookmarksArray;
487
    }
488
489
    /**
490
     * Returns the id of the last bookmark with a lower level than the provided current level.
491
     */
492
    private function getLastParentId(array $bookmarksArray, int $currentLevel): ?int
493
    {
494
        for ($j = count($bookmarksArray) - 1; $j >= 0; $j--) {
495
            if ($bookmarksArray[$j]['__level'] < $currentLevel) {
496
                return $bookmarksArray[$j]['__id'];
497
            }
498
        }
499
500
        return null;
501
    }
502
}
503