| 1 | <?php |
||||
| 2 | namespace EWW\Dpf\Helper; |
||||
| 3 | |||||
| 4 | /* |
||||
| 5 | * This file is part of the TYPO3 CMS project. |
||||
| 6 | * |
||||
| 7 | * It is free software; you can redistribute it and/or modify it under |
||||
| 8 | * the terms of the GNU General Public License, either version 2 |
||||
| 9 | * of the License, or any later version. |
||||
| 10 | * |
||||
| 11 | * For the full copyright and license information, please read the |
||||
| 12 | * LICENSE.txt file that was distributed with this source code. |
||||
| 13 | * |
||||
| 14 | * The TYPO3 project - inspiring people to share! |
||||
| 15 | */ |
||||
| 16 | |||||
| 17 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||
| 18 | |||||
| 19 | class DataCiteXml |
||||
| 20 | { |
||||
| 21 | |||||
| 22 | /** |
||||
| 23 | * Generates DataCite.xml from a given METS.xml |
||||
| 24 | * |
||||
| 25 | * @param string $metsXml |
||||
| 26 | * @return string $dataCiteXml |
||||
| 27 | */ |
||||
| 28 | public static function convertFromMetsXml($metsXml) |
||||
| 29 | { |
||||
| 30 | |||||
| 31 | $metsXml = simplexml_load_string($metsXml, NULL, NULL, "http://www.w3.org/2001/XMLSchema-instance"); |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 32 | $metsXml->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3'); |
||||
| 33 | $metsXml->registerXPathNamespace('slub', 'http://slub-dresden.de/'); |
||||
| 34 | |||||
| 35 | // doi |
||||
| 36 | $metsDoi = $metsXml->xpath("//mods:identifier[@type='doi']"); |
||||
| 37 | if(!empty($metsDoi)) { |
||||
| 38 | $dataCiteDoi = $metsDoi[0]; |
||||
| 39 | } else { |
||||
| 40 | $dataCiteDoi = '10.1000/1'; // http://www.doi.org/index.html as default |
||||
| 41 | } |
||||
| 42 | |||||
| 43 | // creators |
||||
| 44 | $metsCreator = $metsXml->xpath("//mods:name[@type='personal']"); |
||||
| 45 | $dataCiteCreator = array(); |
||||
| 46 | foreach($metsCreator as $creator) |
||||
| 47 | { |
||||
| 48 | $creator->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3'); |
||||
| 49 | $role = $creator->xpath(".//mods:roleTerm[@type='code']"); |
||||
| 50 | if($role[0] != 'aut' && $role[0] != 'cmp') { |
||||
| 51 | continue; |
||||
| 52 | } |
||||
| 53 | $names = array(); |
||||
| 54 | $givenName = $creator->xpath(".//mods:namePart[@type='given']"); |
||||
| 55 | $familyName = $creator->xpath(".//mods:namePart[@type='family']"); |
||||
| 56 | $creatorName = $creator->xpath(".//mods:namePart[@type='displayForm']"); |
||||
| 57 | if(empty($creatorName)) { |
||||
| 58 | if(!empty($givenName) && !empty($familyName)) { |
||||
| 59 | $creatorName = "<creatorName>{$familyName[0]}, {$givenName[0]}</creatorName>"; |
||||
| 60 | } else { |
||||
| 61 | $creatorName = ""; |
||||
| 62 | } |
||||
| 63 | } else { |
||||
| 64 | $creatorName = "<creatorName>{$creatorName[0]}</creatorName>"; |
||||
| 65 | } |
||||
| 66 | $givenName = (!empty($givenName)) ? "<givenName>{$givenName[0]}</givenName>" : ""; |
||||
| 67 | $familyName = (!empty($familyName)) ? "<familyName>{$familyName[0]}</familyName>" : ""; |
||||
| 68 | array_push($names, $creatorName, $givenName, $familyName); |
||||
| 69 | $names = implode("", $names); |
||||
| 70 | array_push($dataCiteCreator, "<creator>{$names}</creator>"); |
||||
| 71 | }; |
||||
| 72 | $dataCiteCreator = implode('', array_unique($dataCiteCreator)); |
||||
| 73 | |||||
| 74 | // title |
||||
| 75 | $metsTitle = $metsXml->xpath("//mods:titleInfo[@usage='primary']/mods:title"); |
||||
| 76 | $dataCiteTitle = (!empty($metsTitle)) ? "<title>{$metsTitle[0]}</title>" : ""; |
||||
| 77 | |||||
| 78 | // subtitles |
||||
| 79 | $metsSubTitles = $metsXml->xpath("//mods:titleInfo[@usage='primary']/mods:subTitle"); |
||||
| 80 | foreach($metsSubTitles as $title) { |
||||
| 81 | $dataCiteTitle .= (!empty($title)) ? "<title titleType=\"Subtitle\">{$title}</title>" : ""; |
||||
| 82 | } |
||||
| 83 | |||||
| 84 | // publisher |
||||
| 85 | $metsPublisher = $metsXml->xpath("//mods:name[@type='corporate']"); |
||||
| 86 | $dataCitePublisher = ''; |
||||
| 87 | foreach($metsPublisher as $corporation) { |
||||
| 88 | $corporation->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3'); |
||||
| 89 | $role = $corporation->xpath(".//mods:roleTerm[@type='code']"); |
||||
| 90 | $role = (string) $role[0]; |
||||
| 91 | $name = $corporation->xpath(".//mods:namePart"); |
||||
| 92 | $name = (string) $name[0]; |
||||
| 93 | if($role == 'pbl') { |
||||
| 94 | $dataCitePublisher = $name; |
||||
| 95 | break; |
||||
| 96 | } elseif($role == 'dgg' || ($role == 'edt' && $dataCitePublisher == '')) { |
||||
| 97 | $dataCitePublisher = $name; |
||||
| 98 | } |
||||
| 99 | } |
||||
| 100 | |||||
| 101 | // publication year |
||||
| 102 | $metsPublicationYear = $metsXml->xpath("//mods:originInfo[@eventType='publication']/mods:dateIssued"); |
||||
| 103 | if(!empty($metsPublicationYear)) { |
||||
| 104 | $dataCitePublicationYear = $metsPublicationYear[0]; |
||||
| 105 | } else { |
||||
| 106 | $metsPublicationYear = $metsXml->xpath("//mods:originInfo/mods:dateIssued"); |
||||
| 107 | $dataCitePublicationYear = (!empty($metsPublicationYear)) ? $metsPublicationYear[0] : ""; |
||||
| 108 | } |
||||
| 109 | if(strlen($dataCitePublicationYear) != 4) { |
||||
| 110 | $dataCitePublicationYear = substr($dataCitePublicationYear, 0, 4); |
||||
| 111 | } |
||||
| 112 | $dataCitePublicationYear = (preg_match('/(19|20)\d{2}/', $dataCitePublicationYear)) ? $dataCitePublicationYear : ""; |
||||
| 113 | |||||
| 114 | // subjects |
||||
| 115 | $metsSubjects = $metsXml->xpath("//mods:classification[@authority='z']"); |
||||
| 116 | $dataCiteSubjects = ''; |
||||
| 117 | foreach(GeneralUtility::trimExplode(',', $metsSubjects[0]) as $subject) { |
||||
| 118 | $dataCiteSubjects .= "<subject>{$subject}</subject>"; |
||||
| 119 | } |
||||
| 120 | |||||
| 121 | // language |
||||
| 122 | $metsLanguage = $metsXml->xpath("//mods:language/mods:languageTerm[@authority='iso639-2b'][@type='code']"); |
||||
| 123 | $dataCiteLanguage = \EWW\Dpf\Helper\LanguageCode::convertFrom6392Bto6391($metsLanguage[0]); |
||||
| 124 | |||||
| 125 | // resource type |
||||
| 126 | $slubResourceType = $metsXml->xpath("//slub:documentType"); |
||||
| 127 | $dataCiteResourceType = (!empty($slubResourceType)) ? $slubResourceType[0] : ""; |
||||
| 128 | |||||
| 129 | $xml = simplexml_load_string(<<< XML |
||||
| 130 | <?xml version="1.0" encoding="UTF-8"?> |
||||
| 131 | <resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd"> |
||||
| 132 | <identifier identifierType="DOI">{$dataCiteDoi}</identifier> |
||||
| 133 | <creators>{$dataCiteCreator}</creators> |
||||
| 134 | <titles>{$dataCiteTitle}</titles> |
||||
| 135 | <publisher>{$dataCitePublisher}</publisher> |
||||
| 136 | <publicationYear>{$dataCitePublicationYear}</publicationYear> |
||||
| 137 | <subjects>{$dataCiteSubjects}</subjects> |
||||
| 138 | <language>{$dataCiteLanguage}</language> |
||||
| 139 | <resourceType resourceTypeGeneral="Text">{$dataCiteResourceType}</resourceType> |
||||
| 140 | </resource> |
||||
| 141 | XML |
||||
| 142 | ); |
||||
| 143 | |||||
| 144 | $dataCiteXml = new \DOMDocument('1.0', 'UTF-8'); |
||||
| 145 | $dataCiteXml->preserveWhiteSpace = false; |
||||
| 146 | $dataCiteXml->formatOutput = true; |
||||
| 147 | $dataCiteXml->loadXML($xml->asXML()); |
||||
|
0 ignored issues
–
show
It seems like
$xml->asXML() can also be of type true; however, parameter $source of DOMDocument::loadXML() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 148 | |||||
| 149 | return($dataCiteXml->saveXML()); |
||||
| 150 | } |
||||
| 151 | } |
||||
| 152 |