Issues (2)

src/XmpMetadataExtractor.php (2 issues)

1
<?php
2
3
namespace JeroenDesloovere\XmpMetadataExtractor;
4
5
use DOMDocument;
6
use JeroenDesloovere\XmpMetadataExtractor\Exception\FileNotFoundException;
7
use SplFileInfo;
8
9
final class XmpMetadataExtractor
10
{
11
    private const DEFAULT_NAMESPACE = 'x';
12
    protected const RDF_ALT = 'rdf:Alt';
13
    protected const RDF_BAG = 'rdf:Bag';
14
    protected const RDF_LI = 'rdf:li';
15
    protected const RDF_SEQ = 'rdf:Seq';
16
    protected const POSSIBLE_CONTAINERS = [
17
        self::RDF_ALT,
18
        self::RDF_BAG,
19
        self::RDF_SEQ,
20
    ];
21
22
    /**
23
     * @var string
24
     */
25
    private $namespace;
26
27
    public function __construct(string $namespace = self::DEFAULT_NAMESPACE)
28
    {
29
        $this->namespace = $namespace;
30
    }
31
32
    private function convertDomNode($node)
33
    {
34
        switch ($node->nodeType) {
35
            case XML_CDATA_SECTION_NODE:
36
            case XML_TEXT_NODE:
37
                return trim($node->textContent);
38
39
                break;
0 ignored issues
show
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
40
            case XML_ELEMENT_NODE:
41
                return $this->convertXmlNode($node);
42
43
                break;
44
        }
45
    }
46
47
    private function convertXmlNode($node)
48
    {
49
        $output = [];
50
51
        for ($i = 0, $m = $node->childNodes->length; $i < $m; $i++) {
52
            $child = $node->childNodes->item($i);
53
            $v = $this->convertDomNode($child);
54
55
            if (isset($child->tagName)) {
56
                $t = $child->tagName;
57
                if (!isset($output[$t])) {
58
                    $output[$t] = array();
59
                }
60
                $output[$t][] = $v;
61
            } elseif ($v || $v === '0') {
62
                $output = (string)$v;
63
            }
64
        }
65
66
        // Has attributes but isn't an array
67
        if ($node->attributes->length && !is_array($output)) {
0 ignored issues
show
The condition is_array($output) is always true.
Loading history...
68
            // Change output into an array.
69
            $output = array('@content' => $output);
70
        }
71
72
        if (is_array($output)) {
73
            if ($node->attributes->length) {
74
                $a = array();
75
                foreach ($node->attributes as $attrName => $attrNode) {
76
                    $a[$attrName] = (string)$attrNode->value;
77
                }
78
                $output['@attributes'] = $a;
79
            }
80
81
            foreach ($output as $t => $v) {
82
                // We are combining arrays for rdf:Bag, rdf:Alt, rdf:Seq
83
                if (in_array($t, self::POSSIBLE_CONTAINERS)) {
84
                    if (!array_key_exists(self::RDF_LI, $v[0])) {
85
                        break;
86
                    }
87
88
                    $output = $v[0][self::RDF_LI];
89
                } elseif (is_array($v) && count($v) == 1 && $t != '@attributes') {
90
                    $output[$t] = $v[0];
91
                }
92
            }
93
        }
94
95
        return $output;
96
    }
97
98
    public function extractFromContent(string $content): array
99
    {
100
        try {
101
            $doc = new DOMDocument();
102
            $doc->loadXML($this->getXmpXmlString($content));
103
104
            $root = $doc->documentElement;
105
            $output = $this->convertDomNode($root);
106
            $output['@root'] = $root->tagName;
107
108
            return $output;
109
        } catch (\Exception $e) {
110
            return [];
111
        }
112
    }
113
114
    public function extractFromFile(string $file): array
115
    {
116
        try {
117
            $file = new SplFileInfo($file);
118
            $contents = file_get_contents($file->getPathname());
119
        } catch (\Exception $e) {
120
            throw new FileNotFoundException('The given File could not be found.');
121
        }
122
123
        return $this->extractFromContent($contents);
124
    }
125
126
    private function getXmpXmlString(string $content): string
127
    {
128
        $xmpDataStart = strpos($content, '<' . $this->namespace . ':xmpmeta');
129
        $xmpDataEnd = strpos($content, '</' . $this->namespace . ':xmpmeta>');
130
        $xmpLength = $xmpDataEnd - $xmpDataStart;
131
132
        return substr($content, $xmpDataStart, $xmpLength + strlen($this->namespace) + 11);
133
    }
134
}
135