Completed
Push — master ( 9ef143...3002ea )
by Ankur
03:19
created

JsonLD::sanitizeJsonSource()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 13
ccs 6
cts 6
cp 1
rs 9.8333
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
/**
4
 * micrometa
5
 *
6
 * @category   Jkphl
7
 * @package    Jkphl\Micrometa
8
 * @subpackage Jkphl\Micrometa\Infrastructure\Parser
9
 * @author     Joschi Kuphal <[email protected]> / @jkphl
10
 * @copyright  Copyright © 2018 Joschi Kuphal <[email protected]> / @jkphl
11
 * @license    http://opensource.org/licenses/MIT The MIT License (MIT)
12
 */
13
14
/***********************************************************************************
15
 *  The MIT License (MIT)
16
 *
17
 *  Copyright © 2018 Joschi Kuphal <[email protected]> / @jkphl
18
 *
19
 *  Permission is hereby granted, free of charge, to any person obtaining a copy of
20
 *  this software and associated documentation files (the "Software"), to deal in
21
 *  the Software without restriction, including without limitation the rights to
22
 *  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
23
 *  the Software, and to permit persons to whom the Software is furnished to do so,
24
 *  subject to the following conditions:
25
 *
26
 *  The above copyright notice and this permission notice shall be included in all
27
 *  copies or substantial portions of the Software.
28
 *
29
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
31
 *  FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
32
 *  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
33
 *  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34
 *  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35
 ***********************************************************************************/
36
37
namespace Jkphl\Micrometa\Infrastructure\Parser;
38
39
use Jkphl\Micrometa\Application\Contract\ParsingResultInterface;
40
use Jkphl\Micrometa\Infrastructure\Parser\JsonLD\CachingContextLoader;
41
use Jkphl\Micrometa\Infrastructure\Parser\JsonLD\VocabularyCache;
42
use Jkphl\Micrometa\Ports\Format;
43
use ML\JsonLD\Exception\JsonLdException;
44
use ML\JsonLD\JsonLD as JsonLDParser;
45
use ML\JsonLD\LanguageTaggedString;
46
use ML\JsonLD\Node;
47
use ML\JsonLD\NodeInterface;
48
use ML\JsonLD\TypedValue;
49
use Psr\Http\Message\UriInterface;
50
use Psr\Log\LoggerInterface;
51
52
/**
53
 * JsonLD parser
54
 *
55
 * @package    Jkphl\Micrometa
56
 * @subpackage Jkphl\Micrometa\Infrastructure
57
 * @see        https://jsonld-examples.com/
58
 * @see        http://www.dr-chuck.com/csev-blog/2016/04/json-ld-performance-sucks-for-api-specs/
59
 */
60
class JsonLD extends AbstractParser
61
{
62
    /**
63
     * Format
64
     *
65
     * @var int
66
     */
67
    const FORMAT = Format::JSON_LD;
68
    /**
69
     * Regex pattern for matching leading comments in a JSON string
70
     *
71
     * @var string
72
     */
73
    const JSON_COMMENT_PATTERN = '#(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|([\s\t]//.*)|(^//.*)#';
74
    /**
75
     * Vocabulary cache
76
     *
77
     * @var VocabularyCache
78
     */
79
    protected $vocabularyCache;
80
    /**
81
     * Context loader
82
     *
83
     * @var CachingContextLoader
84
     */
85
    protected $contextLoader;
86
87
    /**
88
     * JSON-LD parser constructor
89
     *
90
     * @param UriInterface $uri       Base URI
91
     * @param LoggerInterface $logger Logger
92
     */
93 6
    public function __construct(UriInterface $uri, LoggerInterface $logger)
94
    {
95 6
        parent::__construct($uri, $logger);
96 6
        $this->vocabularyCache = new VocabularyCache();
97 6
        $this->contextLoader   = new CachingContextLoader($this->vocabularyCache);
98 6
    }
99
100
    /**
101
     * Parse a DOM document
102
     *
103
     * @param \DOMDocument $dom DOM Document
104
     *
105
     * @return ParsingResultInterface Micro information items
106
     * @throws \ReflectionException
107
     */
108 5
    public function parseDom(\DOMDocument $dom)
109
    {
110 5
        $this->logger->info('Running parser: '.(new \ReflectionClass(__CLASS__))->getShortName());
111 5
        $items = [];
112
113
        // Find and process all JSON-LD documents
114 5
        $xpath      = new \DOMXPath($dom);
115 5
        $jsonLDDocs = $xpath->query('//*[local-name(.) = "script"][@type = "application/ld+json"]');
116 5
        $this->logger->debug('Processing '.$jsonLDDocs->length.' JSON-LD documents');
117
118
        // Run through all JSON-LD documents
119 5
        foreach ($jsonLDDocs as $jsonLDDoc) {
120 5
            $jsonLDDocSource = preg_replace(self::JSON_COMMENT_PATTERN, '', $jsonLDDoc->textContent);
121 5
            $i               = $this->parseDocument($jsonLDDocSource);
122 4
            $items           = array_merge($items, $i);
123
        }
124
125 4
        return new ParsingResult(self::FORMAT, $items);
126
    }
127
128
    /**
129
     * Parse a JSON-LD document
130
     *
131
     * @param string $jsonLDDocSource JSON-LD document
132
     *
133
     * @return array Items
134
     */
135 5
    protected function parseDocument($jsonLDDocSource)
136
    {
137 5
        $jsonLDDocSource = $this->sanitizeJsonSource($jsonLDDocSource);
138
139
        // Unserialize the JSON-LD document
140 5
        $jsonLDDoc = @json_decode($jsonLDDocSource);
141
142
        // If this is not a valid JSON document: Return
143 5
        if (!is_object($jsonLDDoc) && !is_array($jsonLDDoc)) {
144 3
            $this->logger->error('Skipping invalid JSON-LD document');
145
146 2
            return [];
147
        }
148
149
        // Parse the document
150 3
        return array_filter(
151 3
            is_array($jsonLDDoc) ?
152 3
                array_map([$this, 'parseRootNode'], $jsonLDDoc) : [$this->parseRootNode($jsonLDDoc)]
153
        );
154
    }
155
156
    /**
157
     * Parse a JSON-LD root node
158
     *
159
     * @param \stdClass $jsonLDRoot JSON-LD root node
160
     */
161 3
    protected function parseRootNode($jsonLDRoot)
162
    {
163 3
        $item = null;
164
165
        try {
166 3
            $jsonDLDocument = JsonLDParser::getDocument($jsonLDRoot, ['documentLoader' => $this->contextLoader]);
167
168
            // Run through all nodes to parse the first one
169
            /** @var Node $node */
170 3
            foreach ($jsonDLDocument->getGraph()->getNodes() as $node) {
171 3
                $item = $this->parseNode($node);
172 3
                break;
173
            }
174
        } catch (JsonLdException $exception) {
175
            $this->logger->error($exception->getMessage(), ['exception' => $exception]);
176
        }
177
178 3
        return $item;
179
    }
180
181
    /**
182
     * Parse a JSON-LD node
183
     *
184
     * @param NodeInterface $node Node
185
     *
186
     * @return \stdClass Item
187
     */
188 3
    protected function parseNode(NodeInterface $node)
189
    {
190
        return (object)[
191 3
            'type'       => $this->parseNodeType($node),
192 3
            'id'         => $node->getId() ?: null,
193 3
            'properties' => $this->parseNodeProperties($node),
194
        ];
195
    }
196
197
    /**
198
     * Parse the type of a JSON-LD node
199
     *
200
     * @param NodeInterface $node Node
201
     *
202
     * @return array Item type
203
     */
204 3
    protected function parseNodeType(NodeInterface $node): array
205
    {
206
207
        /** @var NodeInterface|NodeInterface[] $itemTypes */
208 3
        $itemTypes = $node->getType();
209 3
        $itemTypes = is_array($itemTypes) ? $itemTypes : [$itemTypes];
210 3
        $itemTypes = array_filter($itemTypes);
211
212 3
        if (empty($itemTypes)) {
213 2
            return [];
214
        }
215
216 3
        $types = [];
217 3
        foreach ($itemTypes as $itemType) {
218 3
            $types[] = $this->vocabularyCache->expandIRI($itemType->getId());
219
        }
220
221 3
        return $types;
222
    }
223
224
    /**
225
     * Parse the properties of a JSON-LD node
226
     *
227
     * @param NodeInterface $node Node
228
     *
229
     * @return array Item properties
230
     */
231 3
    protected function parseNodeProperties(NodeInterface $node)
232
    {
233 3
        $properties = [];
234
235
        // Run through all node properties
236 3
        foreach ($node->getProperties() as $name => $property) {
237
            // Skip the node type
238 3
            if ($name === Node::TYPE) {
239 3
                continue;
240
            }
241
242
            // Initialize the property (if necessary)
243 2
            $this->initializeNodeProperty($name, $properties);
244
245
            // Parse and process the property value
246 2
            $this->processNodeProperty($name, $this->parse($property), $properties);
247
        }
248
249 3
        return $properties;
250
    }
251
252
    /**
253
     * Initialize a JSON-LD node property (if necessary)
254
     *
255
     * @param string $name      Property name
256
     * @param array $properties Item properties
257
     */
258 2
    protected function initializeNodeProperty($name, array &$properties)
259
    {
260 2
        if (empty($properties[$name])) {
261 2
            $properties[$name]         = $this->vocabularyCache->expandIRI($name);
262 2
            $properties[$name]->values = [];
263
        }
264 2
    }
265
266
    /**
267
     * Process a property value
268
     *
269
     * @param string $name                  Property name
270
     * @param \stdClass|array|string $value Property value
271
     * @param array $properties             Item properties
272
     */
273 2
    protected function processNodeProperty($name, $value, array &$properties)
274
    {
275
        // If this is a nested item
276 2
        if (is_object($value)) {
277 2
            $this->processNodePropertyObject($name, $value, $properties);
278
279
            // Else: If this is a value list
280 2
        } elseif (is_array($value)) {
281 2
            foreach ($value as $listValue) {
282 2
                $this->processNodeProperty($name, $listValue, $properties);
283
            }
284
285
            // Else: If the value is not empty
286 2
        } elseif ($value) {
287 2
            $properties[$name]->values[] = $value;
288
        }
289 2
    }
290
291
    /**
292
     * Process a property value object
293
     *
294
     * @param string $name      Property name
295
     * @param \stdClass $value  Property value
296
     * @param array $properties Properties
297
     */
298 2
    protected function processNodePropertyObject($name, $value, array &$properties)
299
    {
300 2
        if (!empty($value->type) || !empty($value->lang)) {
301 2
            $properties[$name]->values[] = $value;
302
303
            // @type = @id
304 2
        } elseif (!empty($value->id)) {
305 2
            $properties[$name]->values[] = $value->id;
306
        }
307 2
    }
308
309
    /**
310
     * Parse a JSON-LD fragment
311
     *
312
     * @param NodeInterface|LanguageTaggedString|TypedValue|array $jsonLD JSON-LD fragment
313
     *
314
     * @return \stdClass|string|array Parsed fragment
315
     */
316 2
    protected function parse($jsonLD)
317
    {
318
        // If it's a node object
319 2
        if ($jsonLD instanceof NodeInterface) {
320 2
            return $this->parseNode($jsonLD);
321
322
            // Else if it's a language tagged string
323 2
        } elseif ($jsonLD instanceof LanguageTaggedString) {
324 1
            return $this->parseLanguageTaggedString($jsonLD);
325
326
            // Else if it's a typed value
327 2
        } elseif ($jsonLD instanceof TypedValue) {
328 2
            return $this->parseTypedValue($jsonLD);
329
        }
330
331
        // Else if it's a list of items
332 2
        return array_map([$this, 'parse'], $jsonLD);
333
    }
334
335
    /**
336
     * Parse a language tagged string
337
     *
338
     * @param LanguageTaggedString $value Language tagged string
339
     *
340
     * @return \stdClass Value
341
     */
342 1
    protected function parseLanguageTaggedString(LanguageTaggedString $value)
343
    {
344 1
        return (object)['value' => $value->getValue(), 'lang' => $value->getLanguage()];
345
    }
346
347
    /**
348
     * Parse a typed value
349
     *
350
     * @param TypedValue $value Typed value
351
     *
352
     * @return string Value
353
     */
354 2
    protected function parseTypedValue(TypedValue $value)
355
    {
356 2
        return $value->getValue();
357
    }
358
359 5
    private function sanitizeJsonSource($jsonLDDocSource)
360
    {
361 5
        $jsonLDDocSource = trim($jsonLDDocSource);
362
363 5
        if (substr($jsonLDDocSource, -1) === ';') {
364 1
            $jsonLDDocSource = substr_replace($jsonLDDocSource, '', -1);
365
        }
366
367
        // Trim, replace tabs, line breaks and extra spaces with single spaces:
368 5
        $jsonLDDocSource = preg_replace('/[ ]{2,}|[\t]|[\n]/', ' ', trim($jsonLDDocSource));
369
370 5
        return $jsonLDDocSource;
371
    }
372
}
373