Completed
Push — master ( d07dd4...376063 )
by Ankur
03:33
created

JsonLD   A

Complexity

Total Complexity 39

Size/Duplication

Total Lines 355
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 9

Test Coverage

Coverage 98.15%

Importance

Changes 0
Metric Value
wmc 39
lcom 1
cbo 9
dl 0
loc 355
ccs 106
cts 108
cp 0.9815
rs 9.28
c 0
b 0
f 0

14 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
A parseDom() 0 19 2
A parseDocument() 0 20 4
A parseRootNode() 0 19 3
A parseNode() 0 20 3
A parseNodeType() 0 19 4
A parseNodeProperties() 0 20 3
A initializeNodeProperty() 0 7 2
A processNodeProperty() 0 17 5
A processNodePropertyObject() 0 10 4
A parse() 0 18 4
A parseLanguageTaggedString() 0 4 1
A parseTypedValue() 0 4 1
A sanitizeJsonSource() 0 37 2
1
<?php
2
3
/**
4
 * micrometa
5
 *
6
 * @category   Jkphl
7
 * @package    Jkphl\Micrometa
8
 * @subpackage Jkphl\Micrometa\Infrastructure\Parser
9
 * @author     Joschi Kuphal <[email protected]> / @jkphl
10
 * @copyright  Copyright © 2018 Joschi Kuphal <[email protected]> / @jkphl
11
 * @license    http://opensource.org/licenses/MIT The MIT License (MIT)
12
 */
13
14
/***********************************************************************************
15
 *  The MIT License (MIT)
16
 *
17
 *  Copyright © 2018 Joschi Kuphal <[email protected]> / @jkphl
18
 *
19
 *  Permission is hereby granted, free of charge, to any person obtaining a copy of
20
 *  this software and associated documentation files (the "Software"), to deal in
21
 *  the Software without restriction, including without limitation the rights to
22
 *  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
23
 *  the Software, and to permit persons to whom the Software is furnished to do so,
24
 *  subject to the following conditions:
25
 *
26
 *  The above copyright notice and this permission notice shall be included in all
27
 *  copies or substantial portions of the Software.
28
 *
29
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
31
 *  FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
32
 *  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
33
 *  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34
 *  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35
 ***********************************************************************************/
36
37
namespace Jkphl\Micrometa\Infrastructure\Parser;
38
39
use Jkphl\Micrometa\Application\Contract\ParsingResultInterface;
40
use Jkphl\Micrometa\Infrastructure\Parser\JsonLD\CachingContextLoader;
41
use Jkphl\Micrometa\Infrastructure\Parser\JsonLD\VocabularyCache;
42
use Jkphl\Micrometa\Ports\Format;
43
use ML\JsonLD\Exception\JsonLdException;
44
use ML\JsonLD\JsonLD as JsonLDParser;
45
use ML\JsonLD\LanguageTaggedString;
46
use ML\JsonLD\Node;
47
use ML\JsonLD\NodeInterface;
48
use ML\JsonLD\TypedValue;
49
use Psr\Http\Message\UriInterface;
50
use Psr\Log\LoggerInterface;
51
52
/**
53
 * JsonLD parser
54
 *
55
 * @package    Jkphl\Micrometa
56
 * @subpackage Jkphl\Micrometa\Infrastructure
57
 * @see        https://jsonld-examples.com/
58
 * @see        http://www.dr-chuck.com/csev-blog/2016/04/json-ld-performance-sucks-for-api-specs/
59
 */
60
class JsonLD extends AbstractParser
61
{
62
    /**
63
     * Format
64
     *
65
     * @var int
66
     */
67
    const FORMAT = Format::JSON_LD;
68
    /**
69
     * Regex pattern for matching leading comments in a JSON string
70
     *
71
     * @var string
72
     */
73
    const JSON_COMMENT_PATTERN = '#(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)#';
74
    /**
75
     * Vocabulary cache
76
     *
77
     * @var VocabularyCache
78
     */
79
    protected $vocabularyCache;
80
    /**
81
     * Context loader
82
     *
83
     * @var CachingContextLoader
84
     */
85
    protected $contextLoader;
86
    /**
87
     * Array for keeping track of the hierarchy of objects, to prevent recursion
88
     *
89
     * @var NodeInterface[]
90
     */
91
    protected $chain = [];
92
93
    /**
94
     * JSON-LD parser constructor
95
     *
96
     * @param UriInterface $uri       Base URI
97
     * @param LoggerInterface $logger Logger
98
     */
99 7
    public function __construct(UriInterface $uri, LoggerInterface $logger)
100
    {
101 7
        parent::__construct($uri, $logger);
102 7
        $this->vocabularyCache = new VocabularyCache();
103 7
        $this->contextLoader   = new CachingContextLoader($this->vocabularyCache);
104 7
    }
105
106
    /**
107
     * Parse a DOM document
108
     *
109
     * @param \DOMDocument $dom DOM Document
110
     *
111
     * @return ParsingResultInterface Micro information items
112
     * @throws \ReflectionException
113
     */
114 6
    public function parseDom(\DOMDocument $dom)
115
    {
116 6
        $this->logger->info('Running parser: '.(new \ReflectionClass(__CLASS__))->getShortName());
117 6
        $items = [];
118
119
        // Find and process all JSON-LD documents
120 6
        $xpath      = new \DOMXPath($dom);
121 6
        $jsonLDDocs = $xpath->query('//*[local-name(.) = "script"][@type = "application/ld+json"]');
122 6
        $this->logger->debug('Processing '.$jsonLDDocs->length.' JSON-LD documents');
123
124
        // Run through all JSON-LD documents
125 6
        foreach ($jsonLDDocs as $jsonLDDoc) {
126 6
            $jsonLDDocSource = preg_replace(self::JSON_COMMENT_PATTERN, '', $jsonLDDoc->textContent);
127 6
            $i               = $this->parseDocument($jsonLDDocSource);
128 5
            $items           = array_merge($items, $i);
129
        }
130
131 5
        return new ParsingResult(self::FORMAT, $items);
132
    }
133
134
    /**
135
     * Parse a JSON-LD document
136
     *
137
     * @param string $jsonLDDocSource JSON-LD document
138
     *
139
     * @return array Items
140
     */
141 6
    protected function parseDocument($jsonLDDocSource)
142
    {
143 6
        $jsonLDDocSource = $this->sanitizeJsonSource($jsonLDDocSource);
144
145
        // Unserialize the JSON-LD document
146 6
        $jsonLDDoc = @json_decode($jsonLDDocSource);
147
148
        // If this is not a valid JSON document: Return
149 6
        if (!is_object($jsonLDDoc) && !is_array($jsonLDDoc)) {
150 3
            $this->logger->error('Skipping invalid JSON-LD document');
151
152 2
            return [];
153
        }
154
155
        // Parse the document
156 4
        return array_filter(
157 4
            is_array($jsonLDDoc) ?
158 4
                array_map([$this, 'parseRootNode'], $jsonLDDoc) : [$this->parseRootNode($jsonLDDoc)]
159
        );
160
    }
161
162
    /**
163
     * Parse a JSON-LD root node
164
     *
165
     * @param \stdClass $jsonLDRoot JSON-LD root node
166
     */
167 4
    protected function parseRootNode($jsonLDRoot)
168
    {
169 4
        $item = null;
170
171
        try {
172 4
            $jsonDLDocument = JsonLDParser::getDocument($jsonLDRoot, ['documentLoader' => $this->contextLoader]);
173
174
            // Run through all nodes to parse the first one
175
            /** @var Node $node */
176 4
            foreach ($jsonDLDocument->getGraph()->getNodes() as $node) {
177 4
                $item = $this->parseNode($node);
178 4
                break;
179
            }
180
        } catch (JsonLdException $exception) {
181
            $this->logger->error($exception->getMessage(), ['exception' => $exception]);
182
        }
183
184 4
        return $item;
185
    }
186
187
    /**
188
     * Parse a JSON-LD node
189
     *
190
     * @param NodeInterface $node Node
191
     *
192
     * @return \stdClass|string Item or string ID
193
     */
194 4
    protected function parseNode(NodeInterface $node)
195
    {
196 4
        $nodeId = $node->getId() ?: null;
197
198
        // if ID is in the current chain, just return the ID reference
199 4
        if (in_array($node, $this->chain, true)) {
200 1
            return $nodeId;
201
        }
202
203
        // add node to chain, parse node tree, remove node from chain
204 4
        $this->chain[] = $node;
205 4
        $properties = $this->parseNodeProperties($node);
206 4
        array_pop($this->chain);
207
208
        return (object)[
209 4
            'type'       => $this->parseNodeType($node),
210 4
            'id'         => $nodeId,
211 4
            'properties' => $properties,
212
        ];
213
    }
214
215
    /**
216
     * Parse the type of a JSON-LD node
217
     *
218
     * @param NodeInterface $node Node
219
     *
220
     * @return array Item type
221
     */
222 4
    protected function parseNodeType(NodeInterface $node): array
223
    {
224
225
        /** @var NodeInterface|NodeInterface[] $itemTypes */
226 4
        $itemTypes = $node->getType();
227 4
        $itemTypes = is_array($itemTypes) ? $itemTypes : [$itemTypes];
228 4
        $itemTypes = array_filter($itemTypes);
229
230 4
        if (empty($itemTypes)) {
231 2
            return [];
232
        }
233
234 4
        $types = [];
235 4
        foreach ($itemTypes as $itemType) {
236 4
            $types[] = $this->vocabularyCache->expandIRI($itemType->getId());
237
        }
238
239 4
        return $types;
240
    }
241
242
    /**
243
     * Parse the properties of a JSON-LD node
244
     *
245
     * @param NodeInterface $node Node
246
     *
247
     * @return array Item properties
248
     */
249 4
    protected function parseNodeProperties(NodeInterface $node)
250
    {
251 4
        $properties = [];
252
253
        // Run through all node properties
254 4
        foreach ($node->getProperties() as $name => $property) {
255
            // Skip the node type
256 4
            if ($name === Node::TYPE) {
257 4
                continue;
258
            }
259
260
            // Initialize the property (if necessary)
261 3
            $this->initializeNodeProperty($name, $properties);
262
263
            // Parse and process the property value
264 3
            $this->processNodeProperty($name, $this->parse($property), $properties);
265
        }
266
267 4
        return $properties;
268
    }
269
270
    /**
271
     * Initialize a JSON-LD node property (if necessary)
272
     *
273
     * @param string $name      Property name
274
     * @param array $properties Item properties
275
     */
276 3
    protected function initializeNodeProperty($name, array &$properties)
277
    {
278 3
        if (empty($properties[$name])) {
279 3
            $properties[$name]         = $this->vocabularyCache->expandIRI($name);
280 3
            $properties[$name]->values = [];
281
        }
282 3
    }
283
284
    /**
285
     * Process a property value
286
     *
287
     * @param string $name                  Property name
288
     * @param \stdClass|array|string $value Property value
289
     * @param array $properties             Item properties
290
     */
291 3
    protected function processNodeProperty($name, $value, array &$properties)
292
    {
293
        // If this is a nested item
294 3
        if (is_object($value)) {
295 2
            $this->processNodePropertyObject($name, $value, $properties);
296
297
            // Else: If this is a value list
298 3
        } elseif (is_array($value)) {
299 2
            foreach ($value as $listValue) {
300 2
                $this->processNodeProperty($name, $listValue, $properties);
301
            }
302
303
            // Else: If the value is not empty
304 3
        } elseif ($value) {
305 3
            $properties[$name]->values[] = $value;
306
        }
307 3
    }
308
309
    /**
310
     * Process a property value object
311
     *
312
     * @param string $name      Property name
313
     * @param \stdClass $value  Property value
314
     * @param array $properties Properties
315
     */
316 2
    protected function processNodePropertyObject($name, $value, array &$properties)
317
    {
318 2
        if (!empty($value->type) || !empty($value->lang)) {
319 2
            $properties[$name]->values[] = $value;
320
321
            // @type = @id
322 2
        } elseif (!empty($value->id)) {
323 2
            $properties[$name]->values[] = $value->id;
324
        }
325 2
    }
326
327
    /**
328
     * Parse a JSON-LD fragment
329
     *
330
     * @param NodeInterface|LanguageTaggedString|TypedValue|array $jsonLD JSON-LD fragment
331
     *
332
     * @return \stdClass|string|array Parsed fragment
333
     */
334 3
    protected function parse($jsonLD)
335
    {
336
        // If it's a node object
337 3
        if ($jsonLD instanceof NodeInterface) {
338 3
            return $this->parseNode($jsonLD);
339
340
            // Else if it's a language tagged string
341 2
        } elseif ($jsonLD instanceof LanguageTaggedString) {
342 1
            return $this->parseLanguageTaggedString($jsonLD);
343
344
            // Else if it's a typed value
345 2
        } elseif ($jsonLD instanceof TypedValue) {
346 2
            return $this->parseTypedValue($jsonLD);
347
        }
348
349
        // Else if it's a list of items
350 2
        return array_map([$this, 'parse'], $jsonLD);
351
    }
352
353
    /**
354
     * Parse a language tagged string
355
     *
356
     * @param LanguageTaggedString $value Language tagged string
357
     *
358
     * @return \stdClass Value
359
     */
360 1
    protected function parseLanguageTaggedString(LanguageTaggedString $value)
361
    {
362 1
        return (object)['value' => $value->getValue(), 'lang' => $value->getLanguage()];
363
    }
364
365
    /**
366
     * Parse a typed value
367
     *
368
     * @param TypedValue $value Typed value
369
     *
370
     * @return string Value
371
     */
372 2
    protected function parseTypedValue(TypedValue $value)
373
    {
374 2
        return $value->getValue();
375
    }
376
377 6
    private function sanitizeJsonSource($jsonLDDocSource)
378
    {
379 6
        $jsonLDDocSource = trim($jsonLDDocSource);
380
381 6
        if (substr($jsonLDDocSource, -1) === ';') {
382 2
            $jsonLDDocSource = substr_replace($jsonLDDocSource, '', -1);
383
        }
384
		
385
        // removing any attribute in tags
386 6
        $jsonLDDocSource = preg_replace("/<([a-zA-Z]+)(>|.*?[^?]>)/","<$1>", $jsonLDDocSource);
387
388
        // replacing more than 2 line breaks to 2 line breaks
389 6
        $jsonLDDocSource = preg_replace("/(<br>){2,}/","<br><br>", $jsonLDDocSource);
390
391
        // Trim, replace tabs and extra spaces with single spaces
392 6
        $jsonLDDocSource = preg_replace('/[ ]{2,}|[\t]|[\n]/', '', trim($jsonLDDocSource));
393
        
394
        // removing spaces between ", "
395 6
        $jsonLDDocSource = preg_replace('/(\",\s+\")/', '","', $jsonLDDocSource);
396
397
        // removing spaces between " : "
398 6
        $jsonLDDocSource = preg_replace('/(\"\s*:\s*\")/', '":"', $jsonLDDocSource);
399
400
        // removing spaces between { "
401 6
        $jsonLDDocSource = preg_replace('/({\s*)/', '{', $jsonLDDocSource);
402
403
        // removing spaces between " : {\[
404 6
        $jsonLDDocSource = preg_replace('/(\")\s*:\s*({|\[)/', '":$2', $jsonLDDocSource);
405
406
        // removing spaces between }|]
407 6
        $jsonLDDocSource = preg_replace('/\s+(}|\])/', '$1', $jsonLDDocSource);
408
409
        // removing double qoutes from json value
410 6
        $jsonLDDocSource = preg_replace('/([^{,:\[])"(?![},:\]])/', "$1".'\''."$2" ,$jsonLDDocSource);
411
412 6
        return $jsonLDDocSource;
413
    }
414
}
415