Passed
Push — master ( d615bc...4104e2 )
by Jeroen
02:08
created

XmpMetadataExtractor::extractFromContent()   A

Complexity

Conditions 2
Paths 5

Size

Total Lines 13
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 9
nc 5
nop 1
dl 0
loc 13
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace JeroenDesloovere\XmpMetadataExtractor;
4
5
use DOMDocument;
6
use JeroenDesloovere\XmpMetadataExtractor\Exception\FileNotFoundException;
7
use SplFileInfo;
8
9
class XmpMetadataExtractor
10
{
11
    protected const RDF_ALT = 'rdf:Alt';
12
    protected const RDF_BAG = 'rdf:Bag';
13
    protected const RDF_LI = 'rdf:li';
14
    protected const RDF_SEQ = 'rdf:Seq';
15
    protected const POSSIBLE_CONTAINERS = [
16
        self::RDF_ALT,
17
        self::RDF_BAG,
18
        self::RDF_SEQ,
19
    ];
20
21
    protected function convertDomNodeToArray($node)
22
    {
23
        $output = [];
24
25
        switch ($node->nodeType) {
26
            case XML_CDATA_SECTION_NODE:
27
            case XML_TEXT_NODE:
28
                $output = trim($node->textContent);
29
30
                break;
31
            case XML_ELEMENT_NODE:
32
                for ($i = 0, $m = $node->childNodes->length; $i < $m; $i++) {
33
                    $child = $node->childNodes->item($i);
34
                    $v = $this->convertDomNodeToArray($child);
35
36
                    if (isset($child->tagName)) {
37
                        $t = $child->tagName;
38
                        if (!isset($output[$t])) {
39
                            $output[$t] = array();
40
                        }
41
                        $output[$t][] = $v;
42
                    } elseif ($v || $v === '0') {
43
                        $output = (string)$v;
44
                    }
45
                }
46
47
                if ($node->attributes->length && !is_array($output)) { //Has attributes but isn't an array
48
                    $output = array('@content' => $output); //Change output into an array.
49
                }
50
51
                if (is_array($output)) {
52
                    if ($node->attributes->length) {
53
                        $a = array();
54
                        foreach ($node->attributes as $attrName => $attrNode) {
55
                            $a[$attrName] = (string)$attrNode->value;
56
                        }
57
                        $output['@attributes'] = $a;
58
                    }
59
60
                    foreach ($output as $t => $v) {
61
                        // We are combining arrays for rdf:Bag, rdf:Alt, rdf:Seq
62
                        if (in_array($t, self::POSSIBLE_CONTAINERS)) {
63
                            if (!array_key_exists(self::RDF_LI, $v[0])) {
64
                                break;
65
                            }
66
67
                            $output = $v[0][self::RDF_LI];
68
                        } elseif (is_array($v) && count($v) == 1 && $t != '@attributes') {
69
                            $output[$t] = $v[0];
70
                        }
71
                    }
72
                }
73
74
                break;
75
        }
76
77
        return $output;
78
    }
79
80
    public function extractFromContent(string $content): array
81
    {
82
        try {
83
            $doc = new DOMDocument();
0 ignored issues
show
Bug introduced by
The call to DOMDocument::__construct() has too few arguments starting with version. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

83
            $doc = /** @scrutinizer ignore-call */ new DOMDocument();

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
84
            $doc->loadXML($this->getXmpXmlString($content));
85
86
            $root = $doc->documentElement;
87
            $output = $this->convertDomNodeToArray($root);
88
            $output['@root'] = $root->tagName;
89
90
            return $output;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $output could return the type string which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
91
        } catch (\Exception $e) {
92
            return [];
93
        }
94
    }
95
96
    public function extractFromFile(string $file): array
97
    {
98
        try {
99
            $file = new SplFileInfo($file);
100
            $contents = file_get_contents($file->getPathname());
101
        } catch (\Exception $e) {
102
            throw new FileNotFoundException('The given File could not be found.');
103
        }
104
105
        return $this->extractFromContent($contents);
0 ignored issues
show
Bug introduced by
It seems like $contents can also be of type false; however, parameter $content of JeroenDesloovere\XmpMeta...r::extractFromContent() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

105
        return $this->extractFromContent(/** @scrutinizer ignore-type */ $contents);
Loading history...
106
    }
107
108
    protected function getXmpXmlString(string $content): string
109
    {
110
        $xmpDataStart = strpos($content, '<x:xmpmeta');
111
        $xmpDataEnd = strpos($content, '</x:xmpmeta>');
112
        $xmpLength = $xmpDataEnd - $xmpDataStart;
113
114
        return substr($content, $xmpDataStart, $xmpLength + 12);
0 ignored issues
show
Bug Best Practice introduced by
The expression return substr($content, ...Start, $xmpLength + 12) could return the type false which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like $xmpDataStart can also be of type false; however, parameter $start of substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

114
        return substr($content, /** @scrutinizer ignore-type */ $xmpDataStart, $xmpLength + 12);
Loading history...
115
    }
116
}
117