1 | <?php |
||
2 | |||
3 | namespace JeroenDesloovere\XmpMetadataExtractor; |
||
4 | |||
5 | use DOMDocument; |
||
6 | use JeroenDesloovere\XmpMetadataExtractor\Exception\FileNotFoundException; |
||
7 | use SplFileInfo; |
||
8 | |||
9 | final class XmpMetadataExtractor |
||
10 | { |
||
11 | private const DEFAULT_NAMESPACE = 'x'; |
||
12 | protected const RDF_ALT = 'rdf:Alt'; |
||
13 | protected const RDF_BAG = 'rdf:Bag'; |
||
14 | protected const RDF_LI = 'rdf:li'; |
||
15 | protected const RDF_SEQ = 'rdf:Seq'; |
||
16 | protected const POSSIBLE_CONTAINERS = [ |
||
17 | self::RDF_ALT, |
||
18 | self::RDF_BAG, |
||
19 | self::RDF_SEQ, |
||
20 | ]; |
||
21 | |||
22 | /** |
||
23 | * @var string |
||
24 | */ |
||
25 | private $namespace; |
||
26 | |||
27 | public function __construct(string $namespace = self::DEFAULT_NAMESPACE) |
||
28 | { |
||
29 | $this->namespace = $namespace; |
||
30 | } |
||
31 | |||
32 | private function convertDomNode($node) |
||
33 | { |
||
34 | switch ($node->nodeType) { |
||
35 | case XML_CDATA_SECTION_NODE: |
||
36 | case XML_TEXT_NODE: |
||
37 | return trim($node->textContent); |
||
38 | |||
39 | break; |
||
0 ignored issues
–
show
|
|||
40 | case XML_ELEMENT_NODE: |
||
41 | return $this->convertXmlNode($node); |
||
42 | |||
43 | break; |
||
44 | } |
||
45 | } |
||
46 | |||
47 | private function convertXmlNode($node) |
||
48 | { |
||
49 | $output = []; |
||
50 | |||
51 | for ($i = 0, $m = $node->childNodes->length; $i < $m; $i++) { |
||
52 | $child = $node->childNodes->item($i); |
||
53 | $v = $this->convertDomNode($child); |
||
54 | |||
55 | if (isset($child->tagName)) { |
||
56 | $t = $child->tagName; |
||
57 | if (!isset($output[$t])) { |
||
58 | $output[$t] = array(); |
||
59 | } |
||
60 | $output[$t][] = $v; |
||
61 | } elseif ($v || $v === '0') { |
||
62 | $output = (string)$v; |
||
63 | } |
||
64 | } |
||
65 | |||
66 | // Has attributes but isn't an array |
||
67 | if ($node->attributes->length && !is_array($output)) { |
||
0 ignored issues
–
show
|
|||
68 | // Change output into an array. |
||
69 | $output = array('@content' => $output); |
||
70 | } |
||
71 | |||
72 | if (is_array($output)) { |
||
73 | if ($node->attributes->length) { |
||
74 | $a = array(); |
||
75 | foreach ($node->attributes as $attrName => $attrNode) { |
||
76 | $a[$attrName] = (string)$attrNode->value; |
||
77 | } |
||
78 | $output['@attributes'] = $a; |
||
79 | } |
||
80 | |||
81 | foreach ($output as $t => $v) { |
||
82 | // We are combining arrays for rdf:Bag, rdf:Alt, rdf:Seq |
||
83 | if (in_array($t, self::POSSIBLE_CONTAINERS)) { |
||
84 | if (!array_key_exists(self::RDF_LI, $v[0])) { |
||
85 | break; |
||
86 | } |
||
87 | |||
88 | $output = $v[0][self::RDF_LI]; |
||
89 | } elseif (is_array($v) && count($v) == 1 && $t != '@attributes') { |
||
90 | $output[$t] = $v[0]; |
||
91 | } |
||
92 | } |
||
93 | } |
||
94 | |||
95 | return $output; |
||
96 | } |
||
97 | |||
98 | public function extractFromContent(string $content): array |
||
99 | { |
||
100 | try { |
||
101 | $doc = new DOMDocument(); |
||
102 | $doc->loadXML($this->getXmpXmlString($content)); |
||
103 | |||
104 | $root = $doc->documentElement; |
||
105 | $output = $this->convertDomNode($root); |
||
106 | $output['@root'] = $root->tagName; |
||
107 | |||
108 | return $output; |
||
109 | } catch (\Exception $e) { |
||
110 | return []; |
||
111 | } |
||
112 | } |
||
113 | |||
114 | public function extractFromFile(string $file): array |
||
115 | { |
||
116 | try { |
||
117 | $file = new SplFileInfo($file); |
||
118 | $contents = file_get_contents($file->getPathname()); |
||
119 | } catch (\Exception $e) { |
||
120 | throw new FileNotFoundException('The given File could not be found.'); |
||
121 | } |
||
122 | |||
123 | return $this->extractFromContent($contents); |
||
124 | } |
||
125 | |||
126 | private function getXmpXmlString(string $content): string |
||
127 | { |
||
128 | $xmpDataStart = strpos($content, '<' . $this->namespace . ':xmpmeta'); |
||
129 | $xmpDataEnd = strpos($content, '</' . $this->namespace . ':xmpmeta>'); |
||
130 | $xmpLength = $xmpDataEnd - $xmpDataStart; |
||
131 | |||
132 | return substr($content, $xmpDataStart, $xmpLength + strlen($this->namespace) + 11); |
||
133 | } |
||
134 | } |
||
135 |
The
break
statement is not necessary if it is preceded for example by areturn
statement:If you would like to keep this construct to be consistent with other
case
statements, you can safely mark this issue as a false-positive.