jeroendesloovere /
xmp-metadata-extractor
| 1 | <?php |
||
| 2 | |||
| 3 | namespace JeroenDesloovere\XmpMetadataExtractor; |
||
| 4 | |||
| 5 | use DOMDocument; |
||
| 6 | use JeroenDesloovere\XmpMetadataExtractor\Exception\FileNotFoundException; |
||
| 7 | use SplFileInfo; |
||
| 8 | |||
| 9 | final class XmpMetadataExtractor |
||
| 10 | { |
||
| 11 | private const DEFAULT_NAMESPACE = 'x'; |
||
| 12 | protected const RDF_ALT = 'rdf:Alt'; |
||
| 13 | protected const RDF_BAG = 'rdf:Bag'; |
||
| 14 | protected const RDF_LI = 'rdf:li'; |
||
| 15 | protected const RDF_SEQ = 'rdf:Seq'; |
||
| 16 | protected const POSSIBLE_CONTAINERS = [ |
||
| 17 | self::RDF_ALT, |
||
| 18 | self::RDF_BAG, |
||
| 19 | self::RDF_SEQ, |
||
| 20 | ]; |
||
| 21 | |||
| 22 | /** |
||
| 23 | * @var string |
||
| 24 | */ |
||
| 25 | private $namespace; |
||
| 26 | |||
| 27 | public function __construct(string $namespace = self::DEFAULT_NAMESPACE) |
||
| 28 | { |
||
| 29 | $this->namespace = $namespace; |
||
| 30 | } |
||
| 31 | |||
| 32 | private function convertDomNode($node) |
||
| 33 | { |
||
| 34 | switch ($node->nodeType) { |
||
| 35 | case XML_CDATA_SECTION_NODE: |
||
| 36 | case XML_TEXT_NODE: |
||
| 37 | return trim($node->textContent); |
||
| 38 | |||
| 39 | break; |
||
|
0 ignored issues
–
show
|
|||
| 40 | case XML_ELEMENT_NODE: |
||
| 41 | return $this->convertXmlNode($node); |
||
| 42 | |||
| 43 | break; |
||
| 44 | } |
||
| 45 | } |
||
| 46 | |||
| 47 | private function convertXmlNode($node) |
||
| 48 | { |
||
| 49 | $output = []; |
||
| 50 | |||
| 51 | for ($i = 0, $m = $node->childNodes->length; $i < $m; $i++) { |
||
| 52 | $child = $node->childNodes->item($i); |
||
| 53 | $v = $this->convertDomNode($child); |
||
| 54 | |||
| 55 | if (isset($child->tagName)) { |
||
| 56 | $t = $child->tagName; |
||
| 57 | if (!isset($output[$t])) { |
||
| 58 | $output[$t] = array(); |
||
| 59 | } |
||
| 60 | $output[$t][] = $v; |
||
| 61 | } elseif ($v || $v === '0') { |
||
| 62 | $output = (string)$v; |
||
| 63 | } |
||
| 64 | } |
||
| 65 | |||
| 66 | // Has attributes but isn't an array |
||
| 67 | if ($node->attributes->length && !is_array($output)) { |
||
|
0 ignored issues
–
show
|
|||
| 68 | // Change output into an array. |
||
| 69 | $output = array('@content' => $output); |
||
| 70 | } |
||
| 71 | |||
| 72 | if (is_array($output)) { |
||
| 73 | if ($node->attributes->length) { |
||
| 74 | $a = array(); |
||
| 75 | foreach ($node->attributes as $attrName => $attrNode) { |
||
| 76 | $a[$attrName] = (string)$attrNode->value; |
||
| 77 | } |
||
| 78 | $output['@attributes'] = $a; |
||
| 79 | } |
||
| 80 | |||
| 81 | foreach ($output as $t => $v) { |
||
| 82 | // We are combining arrays for rdf:Bag, rdf:Alt, rdf:Seq |
||
| 83 | if (in_array($t, self::POSSIBLE_CONTAINERS)) { |
||
| 84 | if (!array_key_exists(self::RDF_LI, $v[0])) { |
||
| 85 | break; |
||
| 86 | } |
||
| 87 | |||
| 88 | $output = $v[0][self::RDF_LI]; |
||
| 89 | } elseif (is_array($v) && count($v) == 1 && $t != '@attributes') { |
||
| 90 | $output[$t] = $v[0]; |
||
| 91 | } |
||
| 92 | } |
||
| 93 | } |
||
| 94 | |||
| 95 | return $output; |
||
| 96 | } |
||
| 97 | |||
| 98 | public function extractFromContent(string $content): array |
||
| 99 | { |
||
| 100 | try { |
||
| 101 | $doc = new DOMDocument(); |
||
| 102 | $doc->loadXML($this->getXmpXmlString($content)); |
||
| 103 | |||
| 104 | $root = $doc->documentElement; |
||
| 105 | $output = $this->convertDomNode($root); |
||
| 106 | $output['@root'] = $root->tagName; |
||
| 107 | |||
| 108 | return $output; |
||
| 109 | } catch (\Exception $e) { |
||
| 110 | return []; |
||
| 111 | } |
||
| 112 | } |
||
| 113 | |||
| 114 | public function extractFromFile(string $file): array |
||
| 115 | { |
||
| 116 | try { |
||
| 117 | $file = new SplFileInfo($file); |
||
| 118 | $contents = file_get_contents($file->getPathname()); |
||
| 119 | } catch (\Exception $e) { |
||
| 120 | throw new FileNotFoundException('The given File could not be found.'); |
||
| 121 | } |
||
| 122 | |||
| 123 | return $this->extractFromContent($contents); |
||
| 124 | } |
||
| 125 | |||
| 126 | private function getXmpXmlString(string $content): string |
||
| 127 | { |
||
| 128 | $xmpDataStart = strpos($content, '<' . $this->namespace . ':xmpmeta'); |
||
| 129 | $xmpDataEnd = strpos($content, '</' . $this->namespace . ':xmpmeta>'); |
||
| 130 | $xmpLength = $xmpDataEnd - $xmpDataStart; |
||
| 131 | |||
| 132 | return substr($content, $xmpDataStart, $xmpLength + strlen($this->namespace) + 11); |
||
| 133 | } |
||
| 134 | } |
||
| 135 |
The
breakstatement is not necessary if it is preceded for example by areturnstatement:If you would like to keep this construct to be consistent with other
casestatements, you can safely mark this issue as a false-positive.