Test Failed
Push — feature/pdfcpu ( 35f439...859fd3 )
by Andreas
06:56
created

PdfcpuWrapper::applyMetadata()   B

Complexity

Conditions 8
Paths 32

Size

Total Lines 45
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 8

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 26
c 1
b 0
f 0
dl 0
loc 45
ccs 21
cts 21
cp 1
rs 8.4444
cc 8
nc 32
nop 3
crap 8
1
<?php
2
/**
3
 * pdfcpu wrapper
4
 *
5
 * @copyright 2014-2024 Institute of Legal Medicine, Medical University of Innsbruck
6
 * @author Andreas Erhard <[email protected]>
7
 * @license LGPL-3.0-only
8
 * @link http://www.gerichtsmedizin.at/
9
 *
10
 * @package pdftk
11
 */
12
13
namespace Gmi\Toolkit\Pdftk;
14
15
use Symfony\Component\Process\Process;
16
17
use Gmi\Toolkit\Pdftk\Exception\FileNotFoundException;
18
use Gmi\Toolkit\Pdftk\Exception\NotImplementedException;
19
use Gmi\Toolkit\Pdftk\Exception\PdfException;
20
use Gmi\Toolkit\Pdftk\Util\Escaper;
21
use Gmi\Toolkit\Pdftk\Util\ProcessFactory;
22
23
use Exception;
24
25
/**
26
 * Wrapper for pdfcpu.
27
 *
28
 * @internal Only the methods exposed by the interfaces should be accessed from outside.
29
 */
30
class PdfcpuWrapper implements WrapperInterface, BinaryPathAwareInterface
31
{
32
    use BinaryPathAwareTrait;
33
34
    private const SUPPORTED_METADATA_ATTRIBUTES = [
35
        'Title', 'Keywords', 'Subject', 'Author', 'Creator', 'Producer', 'CreationDate', 'ModificationDate',
36
    ];
37
38
    /**
39
     * @var ProcessFactory
40
     */
41
    private $processFactory;
42
43
    /**
44
     * @var Escaper
45
     */
46
    private $escaper;
47
48
    /**
49
     * Constructor.
50
     *
51
     * @throws FileNotFoundException
52
     */
53 27
    public function __construct(string $pdftkBinary = null, ProcessFactory $processFactory = null)
54
    {
55 27
        $this->setBinary($pdftkBinary ?: $this->guessBinary(PHP_OS));
56 26
        $this->processFactory = $processFactory ?: new ProcessFactory();
57 26
        $this->escaper = new Escaper();
58 26
    }
59
60
    /**
61
     * Guesses the pdfcpu binary path based on the operating system.
62
     */
63 4
    public function guessBinary(string $operatingSystemString): string
64
    {
65 4
        if (strtoupper(substr($operatingSystemString, 0, 3)) === 'WIN') {
66 1
            $binary = 'C:\\Program Files\\pdfcpu\\pdfcpu.exe';
67
        } else {
68 4
            $binary = '/usr/bin/pdfcpu';
69
        }
70
71 4
        return $binary;
72
    }
73
74
    /**
75
     * {@inheritDoc}
76
     */
77 5
    public function join(array $filePaths, string $outfile): void
78
    {
79 5
        $esc = $this->escaper;
80
81
        $filePathsEscaped = array_map(function (string $filePath) use ($esc) {
82 5
            return $esc->shellArg($filePath);
83 5
        }, $filePaths);
84
85 5
        $fileList = implode(' ', $filePathsEscaped);
86
87 5
        $commandLine = sprintf('%s merge %s %s', $this->getBinary(), $esc->shellArg($outfile), $fileList);
88
89
        /**
90
         * @var Process
91
         */
92 5
        $process = $this->processFactory->createProcess($commandLine);
93
94
        try {
95 5
            $process->mustRun();
96 1
        } catch (Exception $e) {
97 1
            throw new PdfException($e->getMessage(), 0, $e, $process->getErrorOutput(), $process->getOutput());
98
        }
99
100 4
        $process->getOutput();
101 4
    }
102
103
    /**
104
     * {@inheritDoc}
105
     */
106 6
    public function split(string $infile, array $mapping, string $outputFolder = null): void
107
    {
108 6
        $esc = $this->escaper;
109
110 6
        foreach ($mapping as $filename => $pages) {
111 6
            if ($outputFolder) {
112 2
                $target = sprintf('%s/%s', $outputFolder, $filename);
113
            } else {
114 4
                $target = $filename;
115
            }
116
117 6
            $commandLine = sprintf(
118 6
                '%s collect -pages %s %s %s',
119 6
                $this->getBinary(),
120 6
                implode(',', $pages),
121 6
                $esc->shellArg($infile),
122 6
                $esc->shellArg($target)
123
            );
124
125 6
            $process = $this->processFactory->createProcess($commandLine);
126
127
            try {
128 6
                $process->mustRun();
129 1
            } catch (Exception $e) {
130 1
                throw new PdfException($e->getMessage(), 0, $e, $process->getErrorOutput(), $process->getOutput());
131
            }
132
        }
133 5
    }
134
135
    /**
136
     * {@inheritDoc}
137
     */
138 4
    public function reorder(string $infile, array $order, string $outfile = null): void
139
    {
140 4
        $temporaryOutFile = false;
141
142 4
        if ($outfile === null || $infile === $outfile) {
143 1
            $temporaryOutFile = true;
144 1
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
145
        }
146
147 4
        $esc = $this->escaper;
148
149 4
        $commandLine = sprintf(
150 4
            '%s collect -pages %s %s %s',
151 4
            $this->getBinary(),
152 4
            implode(',', $order),
153 4
            $esc->shellArg($infile),
154 4
            $esc->shellArg($outfile)
155
        );
156
157 4
        $process = $this->processFactory->createProcess($commandLine);
158
159
        try {
160 4
            $process->mustRun();
161 1
        } catch (Exception $e) {
162 1
            throw new PdfException(
163 1
                sprintf('Failed to reorder PDF "%s"! Error: %s', $infile, $e->getMessage()),
164 1
                0,
165
                $e,
166 1
                $process->getErrorOutput(),
167 1
                $process->getOutput()
168
            );
169
        }
170
171 3
        if ($temporaryOutFile) {
172 1
            unlink($infile);
173 1
            rename($outfile, $infile);
174
        }
175 3
    }
176
177
    /**
178
     * {@inheritDoc}
179
     */
180 7
    public function applyBookmarks(Bookmarks $bookmarks, string $infile, string $outfile = null): self
181
    {
182 7
        $temporaryOutFile = false;
183
184 7
        $this->checkPdfFileExists($infile);
185 6
        $bookmarksJson = $this->exportBookmarksToJson($bookmarks);
186 6
        $tempfile = tempnam(sys_get_temp_dir(), 'bookmarks') . '.json';
187 6
        file_put_contents($tempfile, $bookmarksJson);
188
189 6
        if ($outfile === null || $infile === $outfile) {
190 1
            $temporaryOutFile = true;
191 1
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
192
        }
193
194 6
        $cmd = sprintf(
195 6
            '%s bookmarks import %s %s %s',
196 6
            $this->getBinary(),
197 6
            $this->escaper->shellArg($infile),
198 6
            $this->escaper->shellArg($tempfile),
199 6
            $this->escaper->shellArg($outfile)
200
        );
201
202 6
        $process = $this->processFactory->createProcess($cmd);
203
204
        try {
205 6
            $process->mustRun();
206 1
        } catch (Exception $e) {
207 1
            $exception = new PdfException(
208 1
                sprintf('Failed to write PDF bookmarks to "%s"! Error: %s', $outfile, $e->getMessage()),
209 1
                0,
210
                $e,
211 1
                $process->getErrorOutput(),
212 1
                $process->getOutput()
213
            );
214
        }
215
216 6
        unlink($tempfile);
217
218 6
        if ($temporaryOutFile && !isset($exception)) {
219 1
            unlink($infile);
220 1
            rename($outfile, $infile);
221
        }
222
223 6
        if (isset($exception)) {
224 1
            throw $exception;
225
        }
226
227 5
        return $this;
228
    }
229
230
    /**
231
     * {@inheritDoc}
232
     */
233 11
    public function importBookmarks(Bookmarks $bookmarks, string $infile): self
234
    {
235 11
        $tempBookmarksFile = tempnam(sys_get_temp_dir(), 'bookmarks') . '.json';
236
237 11
        $this->checkPdfFileExists($infile);
238
239 10
        $cmd = sprintf(
240 10
            '%s bookmarks export %s %s',
241 10
            $this->getBinary(),
242 10
            $this->escaper->shellArg($infile),
243 10
            $this->escaper->shellArg($tempBookmarksFile)
244
        );
245
246 10
        $process = $this->processFactory->createProcess($cmd);
247
248
        try {
249 10
            $process->mustRun();
250 3
        } catch (Exception $e) {
251 3
            $exception = new PdfException(
252 3
                sprintf('Failed to read bookmarks data from "%s"! Error: %s', $infile, $e->getMessage()),
253 3
                0,
254
                $e,
255 3
                $process->getErrorOutput(),
256 3
                $process->getOutput()
257
            );
258
        }
259
260 10
        if (isset($exception) && false === strpos($process->getErrorOutput(), 'no outlines available')) {
261 1
            @unlink($tempBookmarksFile);
262 1
            throw $exception;
263
        }
264
265 9
        $this->importBookmarksFromJson($bookmarks, @file_get_contents($tempBookmarksFile) ?: '');
266
267 9
        @unlink($tempBookmarksFile);
268
269 9
        return $this;
270
    }
271
272
    /**
273
     * {@inheritDoc}
274
     */
275 11
    public function importPages(Pages $pages, string $infile): self
276
    {
277 11
        $this->checkPdfFileExists($infile);
278
279 10
        $cmd = sprintf('%s info -pages 1- -j %s', $this->getBinary(), $this->escaper->shellArg($infile));
280
281 10
        $process = $this->processFactory->createProcess($cmd);
282
283
        try {
284 10
            $process->mustRun();
285 1
        } catch (Exception $e) {
286 1
            $exception = new PdfException(
287 1
                sprintf('Failed to read pages data from "%s"! Error: %s', $infile, $e->getMessage()),
288 1
                0,
289
                $e,
290 1
                $process->getErrorOutput(),
291 1
                $process->getOutput()
292
            );
293
294 1
            throw $exception;
295
        }
296
297
        /**
298
         * Remove invalid JSON (useless line with the page numbers at the beginning)
299
         * @todo Remove when pdfcpu does not emit the extra pages line before JSON anymore
300
         */
301 9
        $outputCleaned = preg_replace('/^pages: (\d,?)+$/mu', '', $process->getOutput());
302 9
        $infoRaw = json_decode($outputCleaned, true);
303
304 9
        $pageBoundaries = $infoRaw['infos'][0]['pageBoundaries'];
305
306
        // the page numbers in the JSON are strings, not numbers and sorted as strings, ensure natural sort
307 9
        ksort($pageBoundaries, SORT_NATURAL);
308
309 9
        foreach ($pageBoundaries as $pageNumber => $pageInfo) {
310 9
            $page = new Page();
311
312
            $page
313 9
                ->setPageNumber((int) $pageNumber)
314 9
                ->setRotation((int) $pageInfo['rot'])
315 9
                ->setWidth((float) $pageInfo['mediaBox']['rect']['ur']['x'])
316 9
                ->setHeight((float) $pageInfo['mediaBox']['rect']['ur']['y'])
317
            ;
318
319 9
            $pages->add($page);
320
        }
321
322 9
        return $this;
323
    }
324
325
    /**
326
     * {@inheritDoc}
327
     */
328 1
    public function applyMetadata(Metadata $metadata, string $infile, string $outfile = null): self
329
    {
330 1
        $temporaryOutFile = false;
331
332
        $this->checkPdfFileExists($infile);
333
334
        $properties = [];
335
        foreach ($metadata->all() as $key => $value) {
336 3
            $properties[] = sprintf('%s=%s', $key, $this->escaper->shellArg($value));
337
        }
338 3
339
        $propArgs = implode(' ', $properties);
340 3
341
        if ($outfile === null || $infile === $outfile) {
342
            $temporaryOutFile = true;
343 3
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
344 1
        }
345 1
346 1
        copy($infile, $outfile);
347 1
348
        $cmd = sprintf('%s properties add %s %s', $this->getBinary(), $this->escaper->shellArg($outfile), $propArgs);
349 1
        $process = $this->processFactory->createProcess($cmd);
350 1
351
        try {
352
            $process->mustRun();
353
        } catch (Exception $e) {
354 2
            $exception = new PdfException(
355 2
                sprintf('Failed to write PDF metadata to "%s"! Error: %s', $outfile, $e->getMessage()),
356
                0,
357 2
                $e,
358 2
                $process->getErrorOutput(),
359
                $process->getOutput()
360 2
            );
361 1
        }
362
363
        if ($temporaryOutFile && !isset($exception)) {
364 2
            unlink($infile);
365 1
            rename($outfile, $infile);
366
        }
367
368
        if (isset($exception)) {
369 2
            throw $exception;
370
        }
371
372
        return $this;
373
    }
374
375 25
    /**
376
     * {@inheritDoc}
377 25
     */
378 3
    public function importMetadata(Metadata $metadata, string $infile): self
379
    {
380 22
        $cmd = sprintf('%s info -j %s', $this->getBinary(), $this->escaper->shellArg($infile));
381
382
        $process = $this->processFactory->createProcess($cmd);
383
384
        try {
385 9
            $process->mustRun();
386
        } catch (Exception $e) {
387 9
            throw new PdfException(
388 9
                sprintf('Failed to read metadata data from "%s"! Error: %s', $infile, $e->getMessage()),
389
                0,
390 9
                $e,
391
                $process->getErrorOutput(),
392 9
                $process->getOutput()
393
            );
394
        }
395
396
        $raw = json_decode($process->getOutput(), true);
397
        $metadataArray = $raw['infos'][0];
398 9
399
        foreach (self::SUPPORTED_METADATA_ATTRIBUTES as $attribute) {
400 9
            $attributeNormalized = lcfirst($attribute);
401 6
402
            if ($attributeNormalized === 'keywords' && isset($metadataArray['keywords'])) {
403
                $metadataArray['keywords'] = implode(', ', $metadataArray['keywords']);
404 6
            }
405 6
406 6
            if (isset($metadataArray[$attributeNormalized]) && '' !== trim($metadataArray[$attributeNormalized])) {
407
                $metadata->set($attribute, $metadataArray[$attributeNormalized]);
408
            }
409 6
        }
410
411 6
        return $this;
412 4
    }
413
414
    /**
415 9
     * Checks whether a PDF file exists.
416
     */
417
    private function checkPdfFileExists($file)
418
    {
419
        if (!file_exists($file)) {
420 6
            throw new FileNotFoundException(sprintf('PDF "%s" not found', $file));
421
        }
422 6
    }
423
424 6
    /**
425
     * Imports bookmarks from a pdfcpu bookmark JSON file.
426
     */
427
    private function importBookmarksFromJson(Bookmarks $bookmarks, string $json): self
428
    {
429
        $raw = json_decode($json, true);
430 6
        $bookmarksArray = $raw['bookmarks'] ?? [];
431
432 6
        $this->parseBookmarksTree($bookmarks, $bookmarksArray);
433
434 6
        return $this;
435 4
    }
436 4
437 4
    /**
438 2
     * Recursively traverse the bookmarks array and add the bookmarks appropriately.
439
     */
440
    private function parseBookmarksTree(Bookmarks $bookmarks, array $arr, int $level = 1)
441 4
    {
442 4
        foreach ($arr as $current) {
443 4
            $bookmark = new Bookmark();
444
445
            $bookmark
446
                ->setTitle($current['title'])
447 4
                ->setPageNumber($current['page'])
448
                ->setLevel($level)
449
            ;
450
451 6
            $bookmarks->add($bookmark);
452
453
            if (isset($current['kids'])) {
454
                $this->parseBookmarksTree($bookmarks, $current['kids'], $level + 1);
455
            }
456
        }
457 6
    }
458
459 6
    /**
460
     * Exports bookmarks to a pdfcpu bookmark JSON file.
461 6
     */
462 6
    private function exportBookmarksToJson(Bookmarks $bookmarks): string
463
    {
464 6
        $bookmarksRecursiveArray = $this->buildBookmarksTree($this->buildBookmarksArrayForTree($bookmarks));
465
466 6
        return json_encode(['bookmarks' => $bookmarksRecursiveArray], JSON_PRETTY_PRINT);
467 4
    }
468 4
469
    /**
470
     * Recursively build the JSON tree based on the normalized bookmarks array.
471 4
     */
472 2
    private function buildBookmarksTree(array $bookmarksArray, $parentId = null): array
473
    {
474 4
        $result = [];
475 1
476
        foreach ($bookmarksArray as $bookmarkItem) {
477
            if ($bookmarkItem['__parent'] === $parentId) {
478 4
                $children = $this->buildBookmarksTree($bookmarksArray, $bookmarkItem['__id']);
479 4
                if ($children) {
480 4
                    $bookmarkItem['kids'] = $children;
481 4
                }
482 4
483 4
                foreach ($bookmarkItem as $key => $value) {
484
                    if (strpos($key, "__") === 0) {
485
                        unset($bookmarkItem[$key]);
486
                    }
487 6
                }
488
489
                $result[] = $bookmarkItem;
490
            }
491
        }
492
493 1
        return $result;
494
    }
495 1
496 1
    /**
497 1
     * Builds an array with additional entries prefixed with "__" for level, id and parent id.
498
     */
499
    private function buildBookmarksArrayForTree(Bookmarks $bookmarks): array
500
    {
501 1
        $bookmarksArray = [];
502
503
        $b = $bookmarks->all();
504
        $bookmarksCount = count($b);
505
506
        $indexParent = null;
507
508
        for ($i = 0; $i < $bookmarksCount; $i++) {
509
            $bookmark = $b[$i];
510
            $prevBookmark = $b[$i - 1] ?? null;
511
512
            // bookmark has a higher level (is deeper down) than the previous one
513
            if ($prevBookmark && $prevBookmark->getLevel() < $bookmark->getLevel()) {
514
                $indexParent = $i - 1;
515
            // bookmark has a lower level (is higher up) than the previous one
516
            } elseif ($prevBookmark && $prevBookmark->getLevel() > $bookmark->getLevel()) {
517
                $indexParent = $this->getLastParentId($bookmarksArray, $bookmark->getLevel()) ?? null;
518
            }
519
520
            $bookmarksArray[] = [
521
                'title' => $bookmark->getTitle(),
522
                'page' => $bookmark->getPageNumber(),
523
                '__level' => $bookmark->getLevel(),
524
                '__id' => $i,
525
                '__parent' => $indexParent,
526
            ];
527
        }
528
529
        return $bookmarksArray;
530
    }
531
532
    /**
533
     * Returns the id of the last bookmark with a lower level than the provided current level.
534
     */
535
    private function getLastParentId(array $bookmarksArray, int $currentLevel): ?int
536
    {
537
        for ($j = count($bookmarksArray) - 1; $j >= 0; $j--) {
538
            if ($bookmarksArray[$j]['__level'] < $currentLevel) {
539
                return $bookmarksArray[$j]['__id'];
540
            }
541
        }
542
543
        return null;
544
    }
545
}
546