1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* ARC2 RDF/XML Parser |
4
|
|
|
* |
5
|
|
|
* @author Benjamin Nowack <[email protected]> |
6
|
|
|
* @license http://arc.semsol.org/license |
7
|
|
|
* @homepage <http://arc.semsol.org/> |
8
|
|
|
* @package ARC2 |
9
|
|
|
* @version 2009-12-03 |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
ARC2::inc('RDFParser'); |
13
|
|
|
|
14
|
|
|
class ARC2_RDFXMLParser extends ARC2_RDFParser { |
15
|
|
|
|
16
|
|
|
function __construct($a = '', &$caller) { |
17
|
|
|
parent::__construct($a, $caller); |
18
|
|
|
} |
19
|
|
|
|
20
|
|
|
function ARC2_RDFXMLParser($a = '', &$caller) { |
21
|
|
|
$this->__construct($a, $caller); |
22
|
|
|
} |
23
|
|
|
|
24
|
|
|
function __init() {/* reader */ |
25
|
|
|
parent::__init(); |
26
|
|
|
$this->encoding = $this->v('encoding', false, $this->a); |
27
|
|
|
$this->state = 0; |
28
|
|
|
$this->x_lang = ''; |
29
|
|
|
$this->x_base = $this->base; |
30
|
|
|
$this->xml = 'http://www.w3.org/XML/1998/namespace'; |
31
|
|
|
$this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; |
32
|
|
|
$this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf'); |
33
|
|
|
$this->s_stack = array(); |
34
|
|
|
$this->s_count = 0; |
35
|
|
|
$this->target_encoding = ''; |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
/* */ |
39
|
|
|
|
40
|
|
|
function parse($path, $data = '', $iso_fallback = false) { |
41
|
|
|
/* reader */ |
42
|
|
|
if (!$this->v('reader')) { |
43
|
|
|
ARC2::inc('Reader'); |
44
|
|
|
$this->reader = & new ARC2_Reader($this->a, $this); |
45
|
|
|
} |
46
|
|
|
$this->reader->setAcceptHeader('Accept: application/rdf+xml; q=0.9, */*; q=0.1'); |
47
|
|
|
$this->reader->activate($path, $data); |
48
|
|
|
$this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; |
49
|
|
|
/* xml parser */ |
50
|
|
|
$this->initXMLParser(); |
51
|
|
|
/* parse */ |
52
|
|
|
$first = true; |
53
|
|
|
while ($d = $this->reader->readStream()) { |
54
|
|
|
if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a)); |
|
|
|
|
55
|
|
|
if ($iso_fallback && $first) { |
56
|
|
|
$d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d); |
57
|
|
|
$first = false; |
58
|
|
|
} |
59
|
|
|
if (!xml_parse($this->xml_parser, $d, false)) { |
60
|
|
|
$error_str = xml_error_string(xml_get_error_code($this->xml_parser)); |
61
|
|
|
$line = xml_get_current_line_number($this->xml_parser); |
62
|
|
|
$this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')'; |
63
|
|
|
if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) { |
64
|
|
|
xml_parser_free($this->xml_parser); |
65
|
|
|
unset($this->xml_parser); |
66
|
|
|
$this->reader->closeStream(); |
67
|
|
|
$this->__init(); |
68
|
|
|
$this->encoding = 'ISO-8859-1'; |
69
|
|
|
unset($this->xml_parser); |
70
|
|
|
unset($this->reader); |
71
|
|
|
return $this->parse($path, $data, true); |
72
|
|
|
} |
73
|
|
|
else { |
74
|
|
|
return $this->addError($this->tmp_error); |
75
|
|
|
} |
76
|
|
|
} |
77
|
|
|
} |
78
|
|
|
$this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING); |
79
|
|
|
xml_parser_free($this->xml_parser); |
80
|
|
|
$this->reader->closeStream(); |
81
|
|
|
unset($this->reader); |
82
|
|
|
return $this->done(); |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
/* */ |
86
|
|
|
|
87
|
|
|
function initXMLParser() { |
88
|
|
|
if (!isset($this->xml_parser)) { |
89
|
|
|
$enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8'; |
90
|
|
|
$parser = xml_parser_create_ns($enc, ''); |
91
|
|
|
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0); |
92
|
|
|
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); |
93
|
|
|
xml_set_element_handler($parser, 'open', 'close'); |
94
|
|
|
xml_set_character_data_handler($parser, 'cdata'); |
95
|
|
|
xml_set_start_namespace_decl_handler($parser, 'nsDecl'); |
96
|
|
|
xml_set_object($parser, $this); |
97
|
|
|
$this->xml_parser =& $parser; |
98
|
|
|
} |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
/* */ |
102
|
|
|
|
103
|
|
|
function getEncoding($src = 'config') { |
104
|
|
|
if ($src == 'parser') { |
105
|
|
|
return $this->target_encoding; |
106
|
|
|
} |
107
|
|
|
elseif (($src == 'config') && $this->encoding) { |
108
|
|
|
return $this->encoding; |
109
|
|
|
} |
110
|
|
|
return $this->reader->getEncoding(); |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
/* */ |
114
|
|
|
|
115
|
|
|
function getTriples() { |
116
|
|
|
return $this->v('triples', array()); |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
function countTriples() { |
120
|
|
|
return $this->t_count; |
121
|
|
|
} |
122
|
|
|
|
123
|
|
|
/* */ |
124
|
|
|
|
125
|
|
|
function pushS(&$s) { |
126
|
|
|
$s['pos'] = $this->s_count; |
127
|
|
|
$this->s_stack[$this->s_count] = $s; |
128
|
|
|
$this->s_count++; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
function popS(){/* php 4.0.x-safe */ |
132
|
|
|
$r = array(); |
133
|
|
|
$this->s_count--; |
134
|
|
|
for ($i = 0, $i_max = $this->s_count; $i < $i_max; $i++) { |
135
|
|
|
$r[$i] = $this->s_stack[$i]; |
136
|
|
|
} |
137
|
|
|
$this->s_stack = $r; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
function updateS($s) { |
141
|
|
|
$this->s_stack[$s['pos']] = $s; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
function getParentS() { |
145
|
|
|
return ($this->s_count && isset($this->s_stack[$this->s_count - 1])) ? $this->s_stack[$this->s_count - 1] : false; |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
function getParentXBase() { |
149
|
|
|
if ($p = $this->getParentS()) { |
150
|
|
|
return isset($p['p_x_base']) && $p['p_x_base'] ? $p['p_x_base'] : (isset($p['x_base']) ? $p['x_base'] : ''); |
151
|
|
|
} |
152
|
|
|
return $this->x_base; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
function getParentXLang() { |
156
|
|
|
if ($p = $this->getParentS()) { |
157
|
|
|
return isset($p['p_x_lang']) && $p['p_x_lang'] ? $p['p_x_lang'] : (isset($p['x_lang']) ? $p['x_lang'] : ''); |
158
|
|
|
} |
159
|
|
|
return $this->x_lang; |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
/* */ |
163
|
|
|
|
164
|
|
|
function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') { |
165
|
|
|
//echo "-----\nadding $s / $p / $o\n-----\n"; |
166
|
|
|
$t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang); |
167
|
|
|
if ($this->skip_dupes) { |
168
|
|
|
$h = md5(serialize($t)); |
169
|
|
|
if (!isset($this->added_triples[$h])) { |
170
|
|
|
$this->triples[$this->t_count] = $t; |
171
|
|
|
$this->t_count++; |
172
|
|
|
$this->added_triples[$h] = true; |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
else { |
176
|
|
|
$this->triples[$this->t_count] = $t; |
177
|
|
|
$this->t_count++; |
178
|
|
|
} |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
function reify($t, $s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') { |
182
|
|
|
$this->addT($t, $this->rdf.'type', $this->rdf.'Statement', 'uri', 'uri'); |
183
|
|
|
$this->addT($t, $this->rdf.'subject', $s, 'uri', $s_type); |
184
|
|
|
$this->addT($t, $this->rdf.'predicate', $p, 'uri', 'uri'); |
185
|
|
|
$this->addT($t, $this->rdf.'object', $o, 'uri', $o_type, $o_dt, $o_lang); |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
/* */ |
189
|
|
|
|
190
|
|
|
function open($p, $t, $a) { |
191
|
|
|
//echo "state is $this->state\n"; |
192
|
|
|
//echo "opening $t\n"; |
193
|
|
|
switch($this->state) { |
194
|
|
|
case 0: return $this->h0Open($t, $a); |
195
|
|
|
case 1: return $this->h1Open($t, $a); |
196
|
|
|
case 2: return $this->h2Open($t, $a); |
197
|
|
|
case 4: return $this->h4Open($t, $a); |
198
|
|
|
case 5: return $this->h5Open($t, $a); |
199
|
|
|
case 6: return $this->h6Open($t, $a); |
200
|
|
|
default: $this->addError('open() called at state ' . $this->state . ' in '.$t); |
201
|
|
|
} |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
function close($p, $t) { |
205
|
|
|
//echo "state is $this->state\n"; |
206
|
|
|
//echo "closing $t\n"; |
207
|
|
|
switch($this->state){ |
208
|
|
|
case 1: return $this->h1Close($t); |
209
|
|
|
case 2: return $this->h2Close($t); |
210
|
|
|
case 3: return $this->h3Close($t); |
211
|
|
|
case 4: return $this->h4Close($t); |
212
|
|
|
case 5: return $this->h5Close($t); |
213
|
|
|
case 6: return $this->h6Close($t); |
214
|
|
|
default: $this->addError('close() called at state ' . $this->state . ' in '.$t); |
215
|
|
|
} |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
function cdata($p, $d) { |
219
|
|
|
//echo "state is $this->state\n"; |
220
|
|
|
//echo "cdata\n"; |
221
|
|
|
switch($this->state){ |
222
|
|
|
case 4: return $this->h4Cdata($d); |
223
|
|
|
case 6: return $this->h6Cdata($d); |
224
|
|
|
default: return false; |
225
|
|
|
} |
226
|
|
|
} |
227
|
|
|
|
228
|
|
|
function nsDecl($p, $prf, $uri) { |
229
|
|
|
$this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf; |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
/* */ |
233
|
|
|
|
234
|
|
|
function h0Open($t, $a) { |
235
|
|
|
$this->x_lang = $this->v($this->xml.'lang', $this->x_lang, $a); |
236
|
|
|
$this->x_base = $this->calcURI($this->v($this->xml.'base', $this->x_base, $a)); |
237
|
|
|
$this->state = 1; |
238
|
|
|
if ($t !== $this->rdf.'RDF') { |
239
|
|
|
$this->h1Open($t, $a); |
240
|
|
|
} |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/* */ |
244
|
|
|
|
245
|
|
|
function h1Open($t, $a) { |
246
|
|
|
$s = array( |
247
|
|
|
'x_base' => isset($a[$this->xml.'base']) ? $this->calcURI($a[$this->xml.'base']) : $this->getParentXBase(), |
248
|
|
|
'x_lang' => isset($a[$this->xml.'lang']) ? $a[$this->xml.'lang'] : $this->getParentXLang(), |
249
|
|
|
'li_count' => 0, |
250
|
|
|
); |
251
|
|
|
/* ID */ |
252
|
|
|
if (isset($a[$this->rdf.'ID'])) { |
253
|
|
|
$s['type'] = 'uri'; |
254
|
|
|
$s['value'] = $this->calcURI('#'.$a[$this->rdf.'ID'], $s['x_base']); |
255
|
|
|
} |
256
|
|
|
/* about */ |
257
|
|
|
elseif (isset($a[$this->rdf.'about'])) { |
258
|
|
|
$s['type'] = 'uri'; |
259
|
|
|
$s['value'] = $this->calcURI($a[$this->rdf.'about'], $s['x_base']); |
260
|
|
|
} |
261
|
|
|
/* bnode */ |
262
|
|
|
else { |
263
|
|
|
$s['type'] = 'bnode'; |
264
|
|
|
if (isset($a[$this->rdf.'nodeID'])) { |
265
|
|
|
$s['value'] = '_:'.$a[$this->rdf.'nodeID']; |
266
|
|
|
} |
267
|
|
|
else { |
268
|
|
|
$s['value'] = $this->createBnodeID(); |
269
|
|
|
} |
270
|
|
|
} |
271
|
|
|
/* sub-node */ |
272
|
|
|
if ($this->state === 4) { |
273
|
|
|
$sup_s = $this->getParentS(); |
274
|
|
|
/* new collection */ |
275
|
|
|
if (isset($sup_s['o_is_coll']) && $sup_s['o_is_coll']) { |
276
|
|
|
$coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']); |
277
|
|
|
$this->addT($sup_s['value'], $sup_s['p'], $coll['value'], $sup_s['type'], $coll['type']); |
278
|
|
|
$this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']); |
279
|
|
|
$this->pushS($coll); |
280
|
|
|
} |
281
|
|
|
/* new entry in existing coll */ |
282
|
|
|
elseif (isset($sup_s['is_coll']) && $sup_s['is_coll']) { |
283
|
|
|
$coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']); |
284
|
|
|
$this->addT($sup_s['value'], $this->rdf . 'rest', $coll['value'], $sup_s['type'], $coll['type']); |
285
|
|
|
$this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']); |
286
|
|
|
$this->pushS($coll); |
287
|
|
|
} |
288
|
|
|
/* normal sub-node */ |
289
|
|
|
elseif(isset($sup_s['p']) && $sup_s['p']) { |
290
|
|
|
$this->addT($sup_s['value'], $sup_s['p'], $s['value'], $sup_s['type'], $s['type']); |
291
|
|
|
} |
292
|
|
|
} |
293
|
|
|
/* typed node */ |
294
|
|
|
if ($t !== $this->rdf.'Description') { |
295
|
|
|
$this->addT($s['value'], $this->rdf.'type', $t, $s['type'], 'uri'); |
296
|
|
|
} |
297
|
|
|
/* (additional) typing attr */ |
298
|
|
|
if (isset($a[$this->rdf.'type'])) { |
299
|
|
|
$this->addT($s['value'], $this->rdf.'type', $a[$this->rdf.'type'], $s['type'], 'uri'); |
300
|
|
|
} |
301
|
|
|
/* Seq|Bag|Alt */ |
302
|
|
|
if (in_array($t, array($this->rdf.'Seq', $this->rdf.'Bag', $this->rdf.'Alt'))) { |
303
|
|
|
$s['is_con'] = true; |
304
|
|
|
} |
305
|
|
|
/* any other attrs (skip rdf and xml, except rdf:_, rdf:value, rdf:Seq) */ |
306
|
|
|
foreach($a as $k => $v) { |
307
|
|
|
if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value|Seq|Bag|Alt|Statement|Property|List)$/', $k)) { |
308
|
|
|
if (strpos($k, ':')) { |
309
|
|
|
$this->addT($s['value'], $k, $v, $s['type'], 'literal', '', $s['x_lang']); |
310
|
|
|
} |
311
|
|
|
} |
312
|
|
|
} |
313
|
|
|
$this->pushS($s); |
314
|
|
|
$this->state = 2; |
315
|
|
|
} |
316
|
|
|
|
317
|
|
|
/* */ |
318
|
|
|
|
319
|
|
|
function h2Open($t, $a) { |
320
|
|
|
$s = $this->getParentS(); |
321
|
|
|
foreach (array('p_x_base', 'p_x_lang', 'p_id', 'o_is_coll') as $k) { |
322
|
|
|
unset($s[$k]); |
323
|
|
|
} |
324
|
|
|
/* base */ |
325
|
|
|
if (isset($a[$this->xml.'base'])) { |
326
|
|
|
$s['p_x_base'] = $this->calcURI($a[$this->xml.'base'], $s['x_base']); |
327
|
|
|
} |
328
|
|
|
$b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : $s['x_base']; |
329
|
|
|
/* lang */ |
330
|
|
|
if (isset($a[$this->xml.'lang'])) { |
331
|
|
|
$s['p_x_lang'] = $a[$this->xml.'lang']; |
332
|
|
|
} |
333
|
|
|
$l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : $s['x_lang']; |
334
|
|
|
/* adjust li */ |
335
|
|
|
if ($t === $this->rdf.'li') { |
336
|
|
|
$s['li_count']++; |
337
|
|
|
$t = $this->rdf.'_'.$s['li_count']; |
338
|
|
|
} |
339
|
|
|
/* set p */ |
340
|
|
|
$s['p'] = $t; |
341
|
|
|
/* reification */ |
342
|
|
|
if (isset($a[$this->rdf.'ID'])) { |
343
|
|
|
$s['p_id'] = $a[$this->rdf.'ID']; |
344
|
|
|
} |
345
|
|
|
$o = array('value' => '', 'type' => '', 'x_base' => $b, 'x_lang' => $l); |
346
|
|
|
/* resource/rdf:resource */ |
347
|
|
|
if (isset($a['resource'])) { |
348
|
|
|
$a[$this->rdf . 'resource'] = $a['resource']; |
349
|
|
|
unset($a['resource']); |
350
|
|
|
} |
351
|
|
|
if (isset($a[$this->rdf.'resource'])) { |
352
|
|
|
$o['value'] = $this->calcURI($a[$this->rdf.'resource'], $b); |
353
|
|
|
$o['type'] = 'uri'; |
354
|
|
|
$this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
355
|
|
|
/* type */ |
356
|
|
|
if (isset($a[$this->rdf.'type'])) { |
357
|
|
|
$this->addT($o['value'], $this->rdf.'type', $a[$this->rdf.'type'], 'uri', 'uri'); |
358
|
|
|
} |
359
|
|
|
/* reification */ |
360
|
|
|
if (isset($s['p_id'])) { |
361
|
|
|
$this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
362
|
|
|
unset($s['p_id']); |
363
|
|
|
} |
364
|
|
|
$this->state = 3; |
365
|
|
|
} |
366
|
|
|
/* named bnode */ |
367
|
|
|
elseif (isset($a[$this->rdf.'nodeID'])) { |
368
|
|
|
$o['value'] = '_:' . $a[$this->rdf.'nodeID']; |
369
|
|
|
$o['type'] = 'bnode'; |
370
|
|
|
$this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
371
|
|
|
$this->state = 3; |
372
|
|
|
/* reification */ |
373
|
|
|
if (isset($s['p_id'])) { |
374
|
|
|
$this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
375
|
|
|
} |
376
|
|
|
} |
377
|
|
|
/* parseType */ |
378
|
|
|
elseif (isset($a[$this->rdf.'parseType'])) { |
379
|
|
|
if ($a[$this->rdf.'parseType'] === 'Literal') { |
380
|
|
|
$s['o_xml_level'] = 0; |
381
|
|
|
$s['o_xml_data'] = ''; |
382
|
|
|
$s['p_xml_literal_level'] = 0; |
383
|
|
|
$s['ns'] = array(); |
384
|
|
|
$this->state = 6; |
385
|
|
|
} |
386
|
|
|
elseif ($a[$this->rdf.'parseType'] === 'Resource') { |
387
|
|
|
$o['value'] = $this->createBnodeID(); |
388
|
|
|
$o['type'] = 'bnode'; |
389
|
|
|
$o['has_closing_tag'] = 0; |
390
|
|
|
$this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
391
|
|
|
$this->pushS($o); |
392
|
|
|
/* reification */ |
393
|
|
|
if (isset($s['p_id'])) { |
394
|
|
|
$this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
395
|
|
|
unset($s['p_id']); |
396
|
|
|
} |
397
|
|
|
$this->state = 2; |
398
|
|
|
} |
399
|
|
|
elseif ($a[$this->rdf.'parseType'] === 'Collection') { |
400
|
|
|
$s['o_is_coll'] = true; |
401
|
|
|
$this->state = 4; |
402
|
|
|
} |
403
|
|
|
} |
404
|
|
|
/* sub-node or literal */ |
405
|
|
|
else { |
406
|
|
|
$s['o_cdata'] = ''; |
407
|
|
|
if (isset($a[$this->rdf.'datatype'])) { |
408
|
|
|
$s['o_datatype'] = $a[$this->rdf.'datatype']; |
409
|
|
|
} |
410
|
|
|
$this->state = 4; |
411
|
|
|
} |
412
|
|
|
/* any other attrs (skip rdf and xml) */ |
413
|
|
|
foreach($a as $k => $v) { |
414
|
|
|
if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value)$/', $k)) { |
415
|
|
|
if (strpos($k, ':')) { |
416
|
|
|
if (!$o['value']) { |
417
|
|
|
$o['value'] = $this->createBnodeID(); |
418
|
|
|
$o['type'] = 'bnode'; |
419
|
|
|
$this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
420
|
|
|
} |
421
|
|
|
/* reification */ |
422
|
|
|
if (isset($s['p_id'])) { |
423
|
|
|
$this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']); |
424
|
|
|
unset($s['p_id']); |
425
|
|
|
} |
426
|
|
|
$this->addT($o['value'], $k, $v, $o['type'], 'literal'); |
427
|
|
|
$this->state = 3; |
428
|
|
|
} |
429
|
|
|
} |
430
|
|
|
} |
431
|
|
|
$this->updateS($s); |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
/* */ |
435
|
|
|
|
436
|
|
|
function h4Open($t, $a) { |
437
|
|
|
return $this->h1Open($t, $a); |
438
|
|
|
} |
439
|
|
|
|
440
|
|
|
/* */ |
441
|
|
|
|
442
|
|
|
function h5Open($t, $a) { |
443
|
|
|
$this->state = 4; |
444
|
|
|
return $this->h4Open($t, $a); |
445
|
|
|
} |
446
|
|
|
|
447
|
|
|
/* */ |
448
|
|
|
|
449
|
|
|
function h6Open($t, $a) { |
450
|
|
|
$s = $this->getParentS(); |
451
|
|
|
$data = isset($s['o_xml_data']) ? $s['o_xml_data'] : ''; |
452
|
|
|
$ns = isset($s['ns']) ? $s['ns'] : array(); |
453
|
|
|
$parts = $this->splitURI($t); |
454
|
|
|
if (count($parts) === 1) { |
455
|
|
|
$data .= '<'.$t; |
456
|
|
|
} |
457
|
|
|
else { |
458
|
|
|
$ns_uri = $parts[0]; |
459
|
|
|
$name = $parts[1]; |
460
|
|
|
if (!isset($this->nsp[$ns_uri])) { |
461
|
|
|
foreach ($this->nsp as $tmp1 => $tmp2) { |
462
|
|
|
if (strpos($t, $tmp1) === 0) { |
463
|
|
|
$ns_uri = $tmp1; |
464
|
|
|
$name = substr($t, strlen($tmp1)); |
465
|
|
|
break; |
466
|
|
|
} |
467
|
|
|
} |
468
|
|
|
} |
469
|
|
|
$nsp = $this->nsp[$ns_uri]; |
470
|
|
|
$data .= $nsp ? '<' . $nsp . ':' . $name : '<' . $name; |
471
|
|
|
/* ns */ |
472
|
|
|
if (!isset($ns[$nsp.'='.$ns_uri]) || !$ns[$nsp.'='.$ns_uri]) { |
473
|
|
|
$data .= $nsp ? ' xmlns:'.$nsp.'="'.$ns_uri.'"' : ' xmlns="'.$ns_uri.'"'; |
474
|
|
|
$ns[$nsp.'='.$ns_uri] = true; |
475
|
|
|
$s['ns'] = $ns; |
476
|
|
|
} |
477
|
|
|
} |
478
|
|
|
foreach ($a as $k => $v) { |
479
|
|
|
$parts = $this->splitURI($k); |
480
|
|
|
if (count($parts) === 1) { |
481
|
|
|
$data .= ' '.$k.'="'.$v.'"'; |
482
|
|
|
} |
483
|
|
|
else { |
484
|
|
|
$ns_uri = $parts[0]; |
485
|
|
|
$name = $parts[1]; |
486
|
|
|
$nsp = $this->nsp[$ns_uri]; |
487
|
|
|
$data .= $nsp ? ' '.$nsp.':'.$name.'="'.$v.'"' : ' '.$name.'="'.$v.'"' ; |
488
|
|
|
} |
489
|
|
|
} |
490
|
|
|
$data .= '>'; |
491
|
|
|
$s['o_xml_data'] = $data; |
492
|
|
|
$s['o_xml_level'] = isset($s['o_xml_level']) ? $s['o_xml_level'] + 1 : 1; |
493
|
|
|
if ($t == $s['p']) {/* xml container prop */ |
494
|
|
|
$s['p_xml_literal_level'] = isset($s['p_xml_literal_level']) ? $s['p_xml_literal_level'] + 1 : 1; |
495
|
|
|
} |
496
|
|
|
$this->updateS($s); |
497
|
|
|
} |
498
|
|
|
|
499
|
|
|
/* */ |
500
|
|
|
|
501
|
|
|
function h1Close($t) {/* end of doc */ |
502
|
|
|
$this->state = 0; |
503
|
|
|
} |
504
|
|
|
|
505
|
|
|
/* */ |
506
|
|
|
|
507
|
|
|
function h2Close($t) {/* expecting a prop, getting a close */ |
508
|
|
|
if ($s = $this->getParentS()) { |
509
|
|
|
$has_closing_tag = (isset($s['has_closing_tag']) && !$s['has_closing_tag']) ? 0 : 1; |
510
|
|
|
$this->popS(); |
511
|
|
|
$this->state = 5; |
512
|
|
|
if ($s = $this->getParentS()) {/* new s */ |
513
|
|
|
if (!isset($s['p']) || !$s['p']) {/* p close after collection|parseType=Resource|node close after p close */ |
514
|
|
|
$this->state = $this->s_count ? 4 : 1; |
515
|
|
|
if (!$has_closing_tag) { |
516
|
|
|
$this->state = 2; |
517
|
|
|
} |
518
|
|
|
} |
519
|
|
|
elseif (!$has_closing_tag) { |
520
|
|
|
$this->state = 2; |
521
|
|
|
} |
522
|
|
|
} |
523
|
|
|
} |
524
|
|
|
} |
525
|
|
|
|
526
|
|
|
/* */ |
527
|
|
|
|
528
|
|
|
function h3Close($t) {/* p close */ |
529
|
|
|
$this->state = 2; |
530
|
|
|
} |
531
|
|
|
|
532
|
|
|
/* */ |
533
|
|
|
|
534
|
|
|
function h4Close($t) {/* empty p | pClose after cdata | pClose after collection */ |
535
|
|
|
if ($s = $this->getParentS()) { |
536
|
|
|
$b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : (isset($s['x_base']) ? $s['x_base'] : ''); |
537
|
|
|
if (isset($s['is_coll']) && $s['is_coll']) { |
538
|
|
|
$this->addT($s['value'], $this->rdf . 'rest', $this->rdf . 'nil', $s['type'], 'uri'); |
539
|
|
|
/* back to collection start */ |
540
|
|
|
while ((!isset($s['p']) || ($s['p'] != $t))) { |
541
|
|
|
$sub_s = $s; |
542
|
|
|
$this->popS(); |
543
|
|
|
$s = $this->getParentS(); |
544
|
|
|
} |
545
|
|
|
/* reification */ |
546
|
|
|
if (isset($s['p_id']) && $s['p_id']) { |
547
|
|
|
$this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $sub_s['value'], $s['type'], $sub_s['type']); |
548
|
|
|
} |
549
|
|
|
unset($s['p']); |
550
|
|
|
$this->updateS($s); |
551
|
|
|
} |
552
|
|
|
else { |
553
|
|
|
$dt = isset($s['o_datatype']) ? $s['o_datatype'] : ''; |
554
|
|
|
$l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : (isset($s['x_lang']) ? $s['x_lang'] : ''); |
555
|
|
|
$o = array('type' => 'literal', 'value' => $s['o_cdata']); |
556
|
|
|
$this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type'], $dt, $l); |
557
|
|
|
/* reification */ |
558
|
|
|
if (isset($s['p_id']) && $s['p_id']) { |
559
|
|
|
$this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type'], $dt, $l); |
560
|
|
|
} |
561
|
|
|
unset($s['o_cdata']); |
562
|
|
|
unset($s['o_datatype']); |
563
|
|
|
unset($s['p']); |
564
|
|
|
$this->updateS($s); |
565
|
|
|
} |
566
|
|
|
$this->state = 2; |
567
|
|
|
} |
568
|
|
|
} |
569
|
|
|
|
570
|
|
|
/* */ |
571
|
|
|
|
572
|
|
|
function h5Close($t) {/* p close */ |
573
|
|
|
if ($s = $this->getParentS()) { |
574
|
|
|
unset($s['p']); |
575
|
|
|
$this->updateS($s); |
576
|
|
|
$this->state = 2; |
577
|
|
|
} |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
/* */ |
581
|
|
|
|
582
|
|
|
function h6Close($t) { |
583
|
|
|
if ($s = $this->getParentS()) { |
584
|
|
|
$l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : (isset($s['x_lang']) ? $s['x_lang'] : ''); |
585
|
|
|
$data = $s['o_xml_data']; |
586
|
|
|
$level = $s['o_xml_level']; |
587
|
|
|
if ($level === 0) {/* pClose */ |
588
|
|
|
$this->addT($s['value'], $s['p'], trim($data, ' '), $s['type'], 'literal', $this->rdf.'XMLLiteral', $l); |
589
|
|
|
unset($s['o_xml_data']); |
590
|
|
|
$this->state = 2; |
591
|
|
|
} |
592
|
|
|
else { |
593
|
|
|
$parts = $this->splitURI($t); |
594
|
|
|
if (count($parts) == 1) { |
595
|
|
|
$data .= '</'.$t.'>'; |
596
|
|
|
} |
597
|
|
|
else { |
598
|
|
|
$ns_uri = $parts[0]; |
599
|
|
|
$name = $parts[1]; |
600
|
|
|
if (!isset($this->nsp[$ns_uri])) { |
601
|
|
|
foreach ($this->nsp as $tmp1 => $tmp2) { |
602
|
|
|
if (strpos($t, $tmp1) === 0) { |
603
|
|
|
$ns_uri = $tmp1; |
604
|
|
|
$name = substr($t, strlen($tmp1)); |
605
|
|
|
break; |
606
|
|
|
} |
607
|
|
|
} |
608
|
|
|
} |
609
|
|
|
$nsp = $this->nsp[$ns_uri]; |
610
|
|
|
$data .= $nsp ? '</'.$nsp.':'.$name.'>' : '</'.$name.'>'; |
611
|
|
|
} |
612
|
|
|
$s['o_xml_data'] = $data; |
613
|
|
|
$s['o_xml_level'] = $level - 1; |
614
|
|
|
if ($t == $s['p']) {/* xml container prop */ |
615
|
|
|
$s['p_xml_literal_level']--; |
616
|
|
|
} |
617
|
|
|
} |
618
|
|
|
$this->updateS($s); |
619
|
|
|
} |
620
|
|
|
} |
621
|
|
|
|
622
|
|
|
/* */ |
623
|
|
|
|
624
|
|
|
function h4Cdata($d) { |
625
|
|
|
if ($s = $this->getParentS()) { |
626
|
|
|
$s['o_cdata'] = isset($s['o_cdata']) ? $s['o_cdata'] . $d : $d; |
627
|
|
|
$this->updateS($s); |
628
|
|
|
} |
629
|
|
|
} |
630
|
|
|
|
631
|
|
|
/* */ |
632
|
|
|
|
633
|
|
|
function h6Cdata($d) { |
634
|
|
|
if ($s = $this->getParentS()) { |
635
|
|
|
if (isset($s['o_xml_data']) || preg_match("/[\n\r]/", $d) || trim($d)) { |
636
|
|
|
$d = htmlspecialchars($d, ENT_NOQUOTES); |
637
|
|
|
$s['o_xml_data'] = isset($s['o_xml_data']) ? $s['o_xml_data'] . $d : $d; |
638
|
|
|
} |
639
|
|
|
$this->updateS($s); |
640
|
|
|
} |
641
|
|
|
} |
642
|
|
|
|
643
|
|
|
/* */ |
644
|
|
|
|
645
|
|
|
} |
646
|
|
|
|
If you suppress an error, we recommend checking for the error condition explicitly: