Completed
Push — master ( 264630...c90943 )
by mw
230:08 queued 195:34
created

includes/export/SMW_Serializer.php (3 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/**
4
 * File holding the SMWSerializer class that provides basic functions for
5
 * serialising data in OWL and RDF syntaxes.
6
 *
7
 * @ingroup SMW
8
 *
9
 * @author Markus Krötzsch
10
 */
11
12
define( 'SMW_SERIALIZER_DECL_CLASS', 1 );
13
define( 'SMW_SERIALIZER_DECL_OPROP', 2 );
14
define( 'SMW_SERIALIZER_DECL_APROP', 4 );
15
16
/**
17
 * Abstract class for serializing exported data (encoded as SMWExpData object)
18
 * in a concrete syntactic format such as Turtle or RDF/XML. The serializer
19
 * adds object serialisations to an internal string that can be retrieved for
20
 * pushing it to an output. This abstract class does not define this string as
21
 * implementations may want to use their own scheme (e.g. using two buffers as
22
 * in the case of SMWRDFXMLSerializer). The function flushContent() returns the
23
 * string serialized so far so as to enable incremental serialization.
24
 *
25
 * RDF and OWL have two types of dependencies that are managed by this class:
26
 * namespaces (and similar abbreviation schemes) and element declarations.
27
 * The former need to be defined before being used, while the latter can occur
28
 * at some later point in the serialization. Declarations are relevant to the
29
 * OWL data model, being one of Class, DatatypeProperty, and ObjectProperty
30
 * (only the latter two are mutually exclusive). This class determines the
31
 * required declaration from the context in which an element is used.
32
 *
33
 * @ingroup SMW
34
 */
35
abstract class SMWSerializer {
36
	/**
37
	 * The current working string is obtained by concatenating the strings
38
	 * $pre_ns_buffer and $post_ns_buffer. The split between the two is such
39
	 * that one can append additional namespace declarations to $pre_ns_buffer
40
	 * so that they affect all current elements. The buffers are flushed during
41
	 * output in order to achieve "streaming" RDF export for larger files.
42
	 * @var string
43
	 */
44
	protected $pre_ns_buffer;
45
	/**
46
	 * See documentation for $pre_ns_buffer.
47
	 * @var string
48
	 */
49
	protected $post_ns_buffer;
50
	/**
51
	 * Array for recording required declarations; format:
52
	 * resourcename => decl-flag, where decl-flag is a sum of flags
53
	 * SMW_SERIALIZER_DECL_CLASS, SMW_SERIALIZER_DECL_OPROP,
54
	 * SMW_SERIALIZER_DECL_APROP.
55
	 * @var array of integer
56
	 */
57
	protected $decl_todo;
58
	/**
59
	 * Array for recording previous declarations; format:
60
	 * resourcename => decl-flag, where decl-flag is a sum of flags
61
	 * SMW_SERIALIZER_DECL_CLASS, SMW_SERIALIZER_DECL_OPROP,
62
	 * SMW_SERIALIZER_DECL_APROP.
63
	 * @var array of integer
64
	 */
65
	protected $decl_done;
66
	/**
67
	 * Array of additional namespaces (abbreviation => URI), flushed on
68
	 * closing the current namespace tag. Since we export in a streamed
69
	 * way, it is not always possible to embed additional namespaces into
70
	 * a syntactic block (e.g. an RDF/XML tag) which might have been sent to
71
	 * the client already. But we wait with printing the current block so that
72
	 * extra namespaces from this array can still be printed (note that one
73
	 * never know which extra namespaces you encounter during export).
74
	 * @var array of string
75
	 */
76
	protected $extra_namespaces;
77
	/**
78
	 * Array of namespaces that have been declared globally already. Contains
79
	 * entries of format 'namespace abbreviation' => true, assuming that the
80
	 * same abbreviation always refers to the same URI.
81
	 * @var array of string
82
	 */
83
	protected $global_namespaces;
84
85
	/**
86
	 * Constructor.
87
	 */
88 14
	public function __construct() {
89 14
		$this->clear();
90 14
	}
91
92
	/**
93
	 * Clear internal states to start a new serialization.
94
	 */
95 14
	public function clear() {
96 14
		$this->pre_ns_buffer = '';
97 14
		$this->post_ns_buffer = '';
98 14
		$this->decl_todo = array();
99 14
		$this->decl_done = array();
100 14
		$this->global_namespaces = array();
101 14
		$this->extra_namespaces = array();
102 14
	}
103
104
	/**
105
	 * Start a new serialization, resetting all internal data and serializing
106
	 * necessary header elements.
107
	 */
108 14
	public function startSerialization() {
109 14
		$this->clear();
110 14
		$this->serializeHeader();
111 14
	}
112
113
	/**
114
	 * Complete the serialization so that calling flushContent() will return
115
	 * the final part of the output, leading to a complete serialization with
116
	 * all necessary declarations. No further serialization functions must be
117
	 * called after this.
118
	 */
119 14
	public function finishSerialization() {
120 14
		$this->serializeDeclarations();
121 14
		$this->serializeFooter();
122 14
	}
123
124
	/**
125
	 * Serialize the header (i.e. write it to the internal buffer). May
126
	 * include standard syntax to start output but also declare some common
127
	 * namespaces globally.
128
	 */
129
	abstract protected function serializeHeader();
130
131
	/**
132
	 * Serialise the footer (i.e. write it to the internal buffer).
133
	 */
134
	abstract protected function serializeFooter();
135
136
	/**
137
	 * Serialize any declarations that have been found to be missing while
138
	 * serializing other elements.
139
	 */
140 14
	public function serializeDeclarations() {
141 14
		foreach ( $this->decl_todo as $name => $flag ) {
142 14
			$types = array();
143 14
			if ( $flag & SMW_SERIALIZER_DECL_CLASS ) {
144 4
				$types[] = 'owl:Class';
145
			}
146 14
			if ( $flag & SMW_SERIALIZER_DECL_OPROP ) {
147 14
				$types[] = 'owl:ObjectProperty';
148
			}
149 14
			if ( $flag & SMW_SERIALIZER_DECL_APROP ) {
150 14
				$types[] = 'owl:DatatypeProperty';
151
			}
152 14
			foreach ( $types as $typename ) {
153 14
				$this->serializeDeclaration( $name, $typename );
154
			}
155 14
			$curdone = array_key_exists( $name, $this->decl_done ) ? $this->decl_done[$name] : 0;
156 14
			$this->decl_done[$name] = $curdone | $flag;
157
		}
158 14
		$this->decl_todo = array(); // reset all
159 14
	}
160
161
	/**
162
	 * Serialize a single declaration for the given $uri (expanded) and type
163
	 * (given as a QName).
164
	 * @param $uri string URI of the thing to declare
165
	 * @param $typename string one of owl:Class, owl:ObjectProperty, and
166
	 * owl:datatypeProperty
167
	 */
168
	abstract public function serializeDeclaration( $uri, $typename );
169
170
	/**
171
	 * Serialise the given SMWExpData object. The method must not assume that
172
	 * the exported data refers to wiki pages or other SMW data, and it must
173
	 * ensure that all required auxiliary declarations for obtaining proper OWL
174
	 * are included in any case (this can be done using requireDeclaration()).
175
	 *
176
	 * @param $data SMWExpData containing the data to be serialised.
177
	 */
178
	abstract public function serializeExpData( SMWExpData $data );
179
180
	/**
181
	 * Get the string that has been serialized so far. This function also
182
	 * resets the internal buffers for serilized strings and namespaces
183
	 * (what is flushed is gone).
184
	 */
185 14
	public function flushContent() {
186 14
		if ( ( $this->pre_ns_buffer === '' ) && ( $this->post_ns_buffer === '' ) ) {
187
			return '';
188
		}
189 14
		$this->serializeNamespaces();
190 14
		$result = $this->pre_ns_buffer . $this->post_ns_buffer;
191 14
		$this->pre_ns_buffer = '';
192 14
		$this->post_ns_buffer = '';
193 14
		return $result;
194
	}
195
196
	/**
197
	 * Include collected namespace information into the serialization.
198
	 */
199 14
	protected function serializeNamespaces() {
200 14
		foreach ( $this->extra_namespaces as $nsshort => $nsuri ) {
201 3
			$this->serializeNamespace( $nsshort, $nsuri );
202
		}
203 14
		$this->extra_namespaces = array();
204 14
	}
205
206
	/**
207
	 * Serialize a single namespace.
208
	 * Namespaces that were serialized in such a way that they remain
209
	 * available for all following output should be added to
210
	 * $global_namespaces.
211
	 * @param $shortname string abbreviation/prefix to declare
212
	 * @param $uri string URI prefix that the namespace encodes
213
	 */
214
	abstract protected function serializeNamespace( $shortname, $uri );
215
216
	/**
217
	 * Require an additional namespace to be declared in the serialization.
218
	 * The function checks whether the required namespace is available globally
219
	 * and add it to the list of required namespaces otherwise.
220
	 */
221 14
	protected function requireNamespace( $nsshort, $nsuri ) {
222 14
		if ( !array_key_exists( $nsshort, $this->global_namespaces ) ) {
223 3
			$this->extra_namespaces[$nsshort] = $nsuri;
224
		}
225 14
	}
226
227
	/**
228
	 * State that a certain declaration is needed. The method checks if the
229
	 * declaration is already available, and records a todo otherwise.
230
	 */
231 14
	protected function requireDeclaration( SMWExpResource $resource, $decltype ) {
232
		// Do not declare predefined OWL language constructs:
233 14
		if ( $resource instanceof SMWExpNsResource ) {
234 14
			$nsId = $resource->getNamespaceId();
235 14
			if ( ( $nsId == 'owl' ) || ( $nsId == 'rdf' ) || ( $nsId == 'rdfs' ) ) {
236 14
				return;
237
			}
238
		}
239
		// Do not declare blank nodes:
240 14
		if ( $resource->isBlankNode() ) {
241
			return;
242
		}
243
244 14
		$name = $resource->getUri();
245 14
		if ( array_key_exists( $name, $this->decl_done ) && ( $this->decl_done[$name] & $decltype ) ) {
246 2
			return;
247
		}
248 14
		if ( !array_key_exists( $name, $this->decl_todo ) ) {
249 14
			$this->decl_todo[$name] = $decltype;
250
		} else {
251 10
			$this->decl_todo[$name] = $this->decl_todo[$name] | $decltype;
252
		}
253 14
	}
254
255
	/**
256
	 * Update the declaration "todo" and "done" lists for the case that the
257
	 * given data has been serialized with the type information it provides.
258
	 *
259
	 * @param $expData specifying the type data upon which declarations are based
260
	 */
261 14
	protected function recordDeclarationTypes( SMWExpData $expData ) {
262 14
		foreach ( $expData->getSpecialValues( 'rdf', 'type') as $typeresource ) {
263 14
			if ( $typeresource instanceof SMWExpNsResource ) {
264 14
				switch ( $typeresource->getQName() ) {
265 14
					case 'owl:Class': $typeflag = SMW_SERIALIZER_DECL_CLASS;
0 ignored issues
show
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
266 2
					break;
267 14
					case 'owl:ObjectProperty': $typeflag = SMW_SERIALIZER_DECL_OPROP;
0 ignored issues
show
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
268 6
					break;
269 14
					case 'owl:DatatypeProperty': $typeflag = SMW_SERIALIZER_DECL_APROP;
0 ignored issues
show
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
270 5
					break;
271 14
					default: $typeflag = 0;
272
				}
273 14
				if ( $typeflag != 0 ) {
274 14
					$this->declarationDone( $expData->getSubject(), $typeflag );
275
				}
276
			}
277
		}
278 14
	}
279
280
	/**
281
	 * Update the declaration "todo" and "done" lists to reflect the fact that
282
	 * the given element has been declared to has the given type.
283
	 *
284
	 * @param $element SMWExpResource specifying the element to update
285
	 * @param $typeflag integer specifying the type (e.g. SMW_SERIALIZER_DECL_CLASS)
286
	 */
287 7
	protected function declarationDone( SMWExpResource $element, $typeflag ) {
288 7
		$name = $element->getUri();
289 7
		$curdone = array_key_exists( $name, $this->decl_done ) ? $this->decl_done[$name] : 0;
290 7
		$this->decl_done[$name] = $curdone | $typeflag;
291 7
		if ( array_key_exists( $name, $this->decl_todo ) ) {
292 2
			$this->decl_todo[$name] = $this->decl_todo[$name] & ( ~$typeflag );
293 2
			if ( $this->decl_todo[$name] == 0 ) {
294 2
				unset( $this->decl_todo[$name] );
295
			}
296
		}
297 7
	}
298
299
	/**
300
	 * Check if the given property is one of the special properties of the OWL
301
	 * language that require their values to be classes or RDF lists of
302
	 * classes. In these cases, it is necessary to declare this in the exported
303
	 * data.
304
	 *
305
	 * @note The list of properties checked here is not complete for the OWL
306
	 * language but covers what is used in SMW.
307
	 * @note OWL 2 allows URIs to refer to both classes and individual elements
308
	 * in different contexts. We only need declarations for classes that are
309
	 * used as such, hence it is enough to check the property. Moreover, we do
310
	 * not use OWL Datatypes in SMW, so rdf:type, rdfs:domain, etc. always
311
	 * refer to classes.
312
	 *
313
	 * @param SMWExpNsResource $property
314
	 *
315
	 * @return boolean
316
	 */
317 14
	protected function isOWLClassTypeProperty( SMWExpNsResource $property ) {
318 14
		$locname = $property->getLocalName();
319 14
		if ( $property->getNamespaceID() == 'rdf' ) {
320 5
			return ( $locname == 'type' );
321 14
		} elseif ( $property->getNamespaceID() == 'owl' ) {
322 14
			return ( $locname == 'intersectionOf' ) || ( $locname == 'unionOf' ) ||
323 14
			       ( $locname == 'equivalentClass' ) ||
324 14
			       ( $locname == 'complementOf' ) || ( $locname == 'someValuesFrom' ) ||
325 14
			       ( $locname == 'allValuesFrom' ) || ( $locname == 'onClass' );
326 14
		} elseif ( $property->getNamespaceID() == 'rdfs' ) {
327 14
			return ( $locname == 'subClassOf' ) || ( $locname == 'range' ) || ( $locname == 'domain' );
328
		} else {
329 14
			return false;
330
		}
331
	}
332
333
}
334