1 | <?php |
||
60 | class JsonLD extends AbstractParser |
||
61 | { |
||
62 | /** |
||
63 | * Format |
||
64 | * |
||
65 | * @var int |
||
66 | */ |
||
67 | const FORMAT = Format::JSON_LD; |
||
68 | /** |
||
69 | * Regex pattern for matching leading comments in a JSON string |
||
70 | * |
||
71 | * @var string |
||
72 | */ |
||
73 | const JSON_COMMENT_PATTERN = '#(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|([\s\t]//.*)|(^//.*)#'; |
||
74 | /** |
||
75 | * Vocabulary cache |
||
76 | * |
||
77 | * @var VocabularyCache |
||
78 | */ |
||
79 | protected $vocabularyCache; |
||
80 | /** |
||
81 | * Context loader |
||
82 | * |
||
83 | * @var CachingContextLoader |
||
84 | */ |
||
85 | protected $contextLoader; |
||
86 | |||
87 | /** |
||
88 | * JSON-LD parser constructor |
||
89 | * |
||
90 | * @param UriInterface $uri Base URI |
||
91 | * @param LoggerInterface $logger Logger |
||
92 | */ |
||
93 | 6 | public function __construct(UriInterface $uri, LoggerInterface $logger) |
|
99 | |||
100 | /** |
||
101 | * Parse a DOM document |
||
102 | * |
||
103 | * @param \DOMDocument $dom DOM Document |
||
104 | * |
||
105 | * @return ParsingResultInterface Micro information items |
||
106 | * @throws \ReflectionException |
||
107 | */ |
||
108 | 5 | public function parseDom(\DOMDocument $dom) |
|
127 | |||
128 | /** |
||
129 | * Parse a JSON-LD document |
||
130 | * |
||
131 | * @param string $jsonLDDocSource JSON-LD document |
||
132 | * |
||
133 | * @return array Items |
||
134 | */ |
||
135 | 5 | protected function parseDocument($jsonLDDocSource) |
|
155 | |||
156 | /** |
||
157 | * Parse a JSON-LD root node |
||
158 | * |
||
159 | * @param \stdClass $jsonLDRoot JSON-LD root node |
||
160 | */ |
||
161 | 3 | protected function parseRootNode($jsonLDRoot) |
|
180 | |||
181 | /** |
||
182 | * Parse a JSON-LD node |
||
183 | * |
||
184 | * @param NodeInterface $node Node |
||
185 | * |
||
186 | * @return \stdClass Item |
||
187 | */ |
||
188 | 3 | protected function parseNode(NodeInterface $node) |
|
196 | |||
197 | /** |
||
198 | * Parse the type of a JSON-LD node |
||
199 | * |
||
200 | * @param NodeInterface $node Node |
||
201 | * |
||
202 | * @return array Item type |
||
203 | */ |
||
204 | 3 | protected function parseNodeType(NodeInterface $node): array |
|
205 | { |
||
206 | 3 | if ($node->isBlankNode()) { |
|
207 | 1 | return []; |
|
208 | } |
||
209 | |||
210 | /** @var NodeInterface|NodeInterface[] $itemTypes */ |
||
211 | 3 | $itemTypes = $node->getType(); |
|
212 | 3 | $itemTypes = is_array($itemTypes) ? $itemTypes : [$itemTypes]; |
|
213 | 3 | $itemTypes = array_filter($itemTypes); |
|
214 | |||
215 | 3 | if (empty($itemTypes)) { |
|
216 | 2 | return []; |
|
217 | } |
||
218 | |||
219 | 3 | $types = []; |
|
220 | 3 | foreach ($itemTypes as $itemType) { |
|
221 | 3 | $types[] = $this->vocabularyCache->expandIRI($itemType->getId()); |
|
222 | } |
||
223 | |||
224 | 3 | return $types; |
|
225 | } |
||
226 | |||
227 | /** |
||
228 | * Parse the properties of a JSON-LD node |
||
229 | * |
||
230 | * @param NodeInterface $node Node |
||
231 | * |
||
232 | * @return array Item properties |
||
233 | */ |
||
234 | 3 | protected function parseNodeProperties(NodeInterface $node) |
|
235 | { |
||
236 | 3 | $properties = []; |
|
237 | |||
238 | // Run through all node properties |
||
239 | 3 | foreach ($node->getProperties() as $name => $property) { |
|
240 | // Skip the node type |
||
241 | 3 | if ($name === Node::TYPE) { |
|
242 | 3 | continue; |
|
243 | } |
||
244 | |||
245 | // Initialize the property (if necessary) |
||
246 | 2 | $this->initializeNodeProperty($name, $properties); |
|
247 | |||
248 | // Parse and process the property value |
||
249 | 2 | $this->processNodeProperty($name, $this->parse($property), $properties); |
|
250 | } |
||
251 | |||
252 | 3 | return $properties; |
|
253 | } |
||
254 | |||
255 | /** |
||
256 | * Initialize a JSON-LD node property (if necessary) |
||
257 | * |
||
258 | * @param string $name Property name |
||
259 | * @param array $properties Item properties |
||
260 | */ |
||
261 | 2 | protected function initializeNodeProperty($name, array &$properties) |
|
262 | { |
||
263 | 2 | if (empty($properties[$name])) { |
|
264 | 2 | $properties[$name] = $this->vocabularyCache->expandIRI($name); |
|
265 | 2 | $properties[$name]->values = []; |
|
266 | } |
||
267 | 2 | } |
|
268 | |||
269 | /** |
||
270 | * Process a property value |
||
271 | * |
||
272 | * @param string $name Property name |
||
273 | * @param \stdClass|array|string $value Property value |
||
274 | * @param array $properties Item properties |
||
275 | */ |
||
276 | 2 | protected function processNodeProperty($name, $value, array &$properties) |
|
277 | { |
||
278 | // If this is a nested item |
||
279 | 2 | if (is_object($value)) { |
|
280 | 2 | $this->processNodePropertyObject($name, $value, $properties); |
|
281 | |||
282 | // Else: If this is a value list |
||
283 | 2 | } elseif (is_array($value)) { |
|
284 | 2 | foreach ($value as $listValue) { |
|
285 | 2 | $this->processNodeProperty($name, $listValue, $properties); |
|
286 | } |
||
287 | |||
288 | // Else: If the value is not empty |
||
289 | 2 | } elseif ($value) { |
|
290 | 2 | $properties[$name]->values[] = $value; |
|
291 | } |
||
292 | 2 | } |
|
293 | |||
294 | /** |
||
295 | * Process a property value object |
||
296 | * |
||
297 | * @param string $name Property name |
||
298 | * @param \stdClass $value Property value |
||
299 | * @param array $properties Properties |
||
300 | */ |
||
301 | 2 | protected function processNodePropertyObject($name, $value, array &$properties) |
|
302 | { |
||
303 | 2 | if (!empty($value->type) || !empty($value->lang)) { |
|
304 | 2 | $properties[$name]->values[] = $value; |
|
305 | |||
306 | // @type = @id |
||
307 | 2 | } elseif (!empty($value->id)) { |
|
308 | 2 | $properties[$name]->values[] = $value->id; |
|
309 | } |
||
310 | 2 | } |
|
311 | |||
312 | /** |
||
313 | * Parse a JSON-LD fragment |
||
314 | * |
||
315 | * @param NodeInterface|LanguageTaggedString|TypedValue|array $jsonLD JSON-LD fragment |
||
316 | * |
||
317 | * @return \stdClass|string|array Parsed fragment |
||
318 | */ |
||
319 | 2 | protected function parse($jsonLD) |
|
320 | { |
||
321 | // If it's a node object |
||
322 | 2 | if ($jsonLD instanceof NodeInterface) { |
|
323 | 2 | return $this->parseNode($jsonLD); |
|
324 | |||
325 | // Else if it's a language tagged string |
||
326 | 2 | } elseif ($jsonLD instanceof LanguageTaggedString) { |
|
327 | 1 | return $this->parseLanguageTaggedString($jsonLD); |
|
328 | |||
329 | // Else if it's a typed value |
||
330 | 2 | } elseif ($jsonLD instanceof TypedValue) { |
|
331 | 2 | return $this->parseTypedValue($jsonLD); |
|
332 | } |
||
333 | |||
334 | // Else if it's a list of items |
||
335 | 2 | return array_map([$this, 'parse'], $jsonLD); |
|
336 | } |
||
337 | |||
338 | /** |
||
339 | * Parse a language tagged string |
||
340 | * |
||
341 | * @param LanguageTaggedString $value Language tagged string |
||
342 | * |
||
343 | * @return \stdClass Value |
||
344 | */ |
||
345 | 1 | protected function parseLanguageTaggedString(LanguageTaggedString $value) |
|
346 | { |
||
347 | 1 | return (object)['value' => $value->getValue(), 'lang' => $value->getLanguage()]; |
|
348 | } |
||
349 | |||
350 | /** |
||
351 | * Parse a typed value |
||
352 | * |
||
353 | * @param TypedValue $value Typed value |
||
354 | * |
||
355 | * @return string Value |
||
356 | */ |
||
357 | 2 | protected function parseTypedValue(TypedValue $value) |
|
361 | |||
362 | 5 | private function sanitizeJsonSource($jsonLDDocSource) |
|
363 | { |
||
364 | 5 | $jsonLDDocSource = trim($jsonLDDocSource); |
|
365 | |||
366 | 5 | if (substr($jsonLDDocSource, -1) === ';') { |
|
367 | 1 | $jsonLDDocSource = substr_replace($jsonLDDocSource, '', -1); |
|
368 | } |
||
372 | } |
||
373 |