1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace YusufKandemir\MicrodataParser; |
4
|
|
|
|
5
|
|
|
class MicrodataDOMElement extends \DOMElement |
6
|
|
|
{ |
7
|
|
|
/** |
8
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-item-properties for details of algorithm |
9
|
|
|
* |
10
|
|
|
* @return array |
11
|
|
|
*/ |
12
|
27 |
|
public function getProperties() : array |
13
|
|
|
{ |
14
|
27 |
|
$results = []; |
15
|
27 |
|
$memory = [$this]; |
16
|
27 |
|
$pending = $this->getChildElementNodes(); |
17
|
|
|
|
18
|
27 |
|
$pending = array_merge($pending, $this->getReferenceNodes()); |
19
|
9 |
|
|
20
|
|
|
while ($pending) { |
|
|
|
|
21
|
9 |
|
$current = array_pop($pending); |
22
|
9 |
|
|
23
|
|
|
foreach ($memory as $memory_item) { |
24
|
9 |
|
if ($current->isSameNode($memory_item)) { |
25
|
9 |
|
continue 2; // Skip next part and continue while loop if memory contains $current |
26
|
|
|
} |
27
|
|
|
} |
28
|
|
|
|
29
|
|
|
$memory[] = $current; |
30
|
27 |
|
|
31
|
27 |
|
if (! $current->hasAttribute('itemscope')) { |
32
|
|
|
$pending = array_merge($pending, $current->getChildElementNodes()); |
33
|
27 |
|
} |
34
|
27 |
|
|
35
|
27 |
|
if ($current->hasAttribute('itemprop') && $current->hasPropertyNames()) { |
36
|
|
|
$results[] = $current; |
37
|
|
|
} |
38
|
|
|
} |
39
|
27 |
|
|
40
|
|
|
return array_reverse($results); |
41
|
27 |
|
} |
42
|
27 |
|
|
43
|
|
|
/** |
44
|
|
|
* @return bool |
45
|
27 |
|
*/ |
46
|
27 |
|
public function hasPropertyNames() : bool |
47
|
|
|
{ |
48
|
|
|
return !empty($this->tokenizeAttribute('itemprop')); |
49
|
|
|
} |
50
|
27 |
|
|
51
|
|
|
/** |
52
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-property-name |
53
|
|
|
* |
54
|
|
|
* @return array |
55
|
|
|
*/ |
56
|
27 |
|
public function getPropertyNames() : array |
57
|
|
|
{ |
58
|
27 |
|
$tokens = $this->tokenizeAttribute('itemprop'); |
59
|
|
|
|
60
|
|
|
$properties = []; |
61
|
|
|
|
62
|
|
|
foreach ($tokens as $token) { |
63
|
|
|
if (!$this->isAbsoluteUri($token) && $this->tokenizeAttribute('itemtype')) { |
64
|
|
|
$token = /*$vocabularyIdentifier . */ $token; |
65
|
|
|
} |
66
|
27 |
|
|
67
|
|
|
$properties[] = $token; |
68
|
27 |
|
} |
69
|
|
|
|
70
|
27 |
|
return array_unique($properties); |
71
|
|
|
} |
72
|
27 |
|
|
73
|
27 |
|
/** |
74
|
18 |
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-property-value for details of algorithm |
75
|
|
|
* |
76
|
|
|
* @return $this|string |
77
|
27 |
|
*/ |
78
|
|
|
public function getPropertyValue() |
79
|
|
|
{ |
80
|
27 |
|
if ($this->hasAttribute('itemscope')) { |
81
|
|
|
return $this; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
if ($this->hasAttribute('content')) { |
85
|
|
|
return $this->getAttribute('content'); |
86
|
|
|
} |
87
|
|
|
|
88
|
27 |
|
$base = $this->ownerDocument->documentURI; |
89
|
|
|
|
90
|
27 |
|
switch ($this->tagName) { |
91
|
18 |
|
case 'audio': |
92
|
|
|
case 'embed': |
93
|
|
|
case 'iframe': |
94
|
27 |
|
case 'img': |
95
|
|
|
case 'source': |
96
|
|
|
case 'track': |
97
|
|
|
case 'video': |
98
|
27 |
|
if ($this->hasAttribute('src')) { |
99
|
|
|
$result = $this->getAttribute('src'); |
100
|
27 |
|
|
101
|
27 |
|
// @todo check against protocol relative urls like "//example.com/test.jpg" |
102
|
27 |
|
return $this->isAbsoluteUri($result) ? $result : $base.$result; |
103
|
27 |
|
} |
104
|
27 |
|
// No break |
105
|
27 |
|
case 'a': |
106
|
27 |
|
case 'area': |
107
|
27 |
|
case 'link': |
108
|
9 |
|
if ($this->hasAttribute('href')) { |
109
|
9 |
|
$result = $this->getAttribute('href'); |
110
|
|
|
|
111
|
|
|
return $this->isAbsoluteUri($result) ? $result : $base.$result; |
112
|
9 |
|
} |
113
|
|
|
// No break |
114
|
|
|
case 'object': |
115
|
27 |
|
if ($this->hasAttribute('data')) { |
116
|
27 |
|
$result = $this->getAttribute('data'); |
117
|
27 |
|
|
118
|
18 |
|
return $this->isAbsoluteUri($result) ? $result : $base.$result; |
119
|
18 |
|
} |
120
|
|
|
// No break |
121
|
18 |
|
case 'data': |
122
|
|
|
case 'meter': |
123
|
|
|
if ($this->hasAttribute('value')) { |
124
|
27 |
|
return $this->getAttribute('value'); |
125
|
9 |
|
} |
126
|
9 |
|
// No break |
127
|
|
|
case 'time': |
128
|
9 |
|
if ($this->hasAttribute('datetime')) { |
129
|
|
|
return $this->getAttribute('datetime'); |
130
|
|
|
} |
131
|
27 |
|
// No break |
132
|
27 |
|
default: |
133
|
9 |
|
return $this->textContent; |
134
|
9 |
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
27 |
|
/** |
138
|
9 |
|
* Checks a string to see if its absolute uri or not |
139
|
9 |
|
* Note: As it uses a simple regex to check, it is not that reliable |
140
|
|
|
* |
141
|
|
|
* @see \preg_match() for return values |
142
|
|
|
* |
143
|
27 |
|
* @param string $uri |
144
|
|
|
* |
145
|
|
|
* @return false|int |
146
|
|
|
*/ |
147
|
|
|
protected function isAbsoluteUri(string $uri) |
148
|
|
|
{ |
149
|
|
|
return preg_match("/^\w+:/", trim($uri)); |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* Filters out TextNodes etc. and returns child ElementNodes as array |
154
|
|
|
* |
155
|
|
|
* @return array Result array which contains child ElementNodes |
156
|
|
|
*/ |
157
|
27 |
|
protected function getChildElementNodes() |
158
|
|
|
{ |
159
|
27 |
|
$childNodes = []; |
160
|
|
|
|
161
|
|
|
foreach ($this->childNodes as $childNode) { |
162
|
|
|
if ($childNode->nodeType == XML_ELEMENT_NODE) { |
163
|
|
|
$childNodes[] = $childNode; |
164
|
|
|
} |
165
|
|
|
} |
166
|
|
|
|
167
|
27 |
|
return $childNodes; |
168
|
|
|
} |
169
|
27 |
|
|
170
|
|
|
/** |
171
|
27 |
|
* Tokenizes value of given attribute |
172
|
27 |
|
* |
173
|
27 |
|
* @param string $attributeName Name of the attribute |
174
|
|
|
* |
175
|
|
|
* @return array|array[]|false|string[] |
176
|
|
|
*/ |
177
|
27 |
|
public function tokenizeAttribute(string $attributeName) |
178
|
|
|
{ |
179
|
|
|
$attribute = []; |
180
|
|
|
|
181
|
|
|
if ($this->hasAttribute($attributeName)) { |
182
|
|
|
$attribute = $this->tokenize($this->getAttribute($attributeName)); |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
return $attribute; |
186
|
|
|
} |
187
|
27 |
|
|
188
|
|
|
/** |
189
|
27 |
|
* Splits given attribute value in space characters to array |
190
|
|
|
* |
191
|
27 |
|
* @see \preg_split() for possible return values and behaviour |
192
|
27 |
|
* |
193
|
|
|
* @see https://www.w3.org/TR/2018/WD-microdata-20180426/#dfn-split-a-string-on-spaces for definition of tokens |
194
|
|
|
* |
195
|
27 |
|
* @param string $attribute |
196
|
|
|
* |
197
|
|
|
* @return array[]|false|string[] |
198
|
|
|
*/ |
199
|
|
|
protected function tokenize(string $attribute) |
200
|
|
|
{ |
201
|
|
|
return preg_split('/\s+/', trim($attribute)); |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
/** |
205
|
|
|
* Finds the nodes that this node references through the document |
206
|
|
|
* |
207
|
|
|
* @see https://www.w3.org/TR/microdata/#dfn-item-properties 4th step |
208
|
|
|
* |
209
|
27 |
|
* @return array |
210
|
|
|
*/ |
211
|
27 |
|
protected function getReferenceNodes(): array |
212
|
|
|
{ |
213
|
|
|
$referenceNodes = []; |
214
|
|
|
|
215
|
|
|
if ($this->hasAttribute('itemref')) { |
216
|
|
|
$tokens = $this->tokenizeAttribute('itemref'); |
217
|
|
|
|
218
|
|
|
foreach ($tokens as $token) { |
219
|
|
|
$references = $this->ownerDocument->xpath->query('//*[@id="' . $token . '"]'); |
220
|
|
|
|
221
|
|
|
if ($first = $references->item(0)) { |
222
|
|
|
$referenceNodes[] = $first; |
223
|
|
|
} |
224
|
|
|
} |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
return $referenceNodes; |
228
|
|
|
} |
229
|
|
|
} |
230
|
|
|
|
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.