1
|
|
|
<?php |
|
|
|
|
2
|
|
|
/** |
3
|
|
|
* Copyright (c) 2012-2013 Aalto University and University of Helsinki |
4
|
|
|
* MIT License |
5
|
|
|
* see LICENSE.txt for more information |
6
|
|
|
*/ |
7
|
|
|
|
8
|
|
|
/* Register text: namespace needed for jena-text queries */ |
9
|
|
|
EasyRdf_Namespace::set('text', 'http://jena.apache.org/text#'); |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Provides functions tailored to the JenaTextSparql extensions for the Fuseki SPARQL index. |
13
|
|
|
*/ |
14
|
|
|
class JenaTextSparql extends GenericSparql |
|
|
|
|
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* How many results to ask from the jena-text index. jena-text defaults to |
18
|
|
|
* 10000, but that is too little in some cases. |
19
|
|
|
* See issue reports: |
20
|
|
|
* https://code.google.com/p/onki-light/issues/detail?id=109 (original, set to 1000000000) |
21
|
|
|
* https://github.com/NatLibFi/Skosmos/issues/41 (reduced to 100000 because of bad performance) |
22
|
|
|
*/ |
23
|
|
|
private $MAX_N = 100000; |
24
|
|
|
|
25
|
|
|
/* |
26
|
|
|
* Characters that need to be quoted for the Lucene query parser. |
27
|
|
|
* See http://lucene.apache.org/core/4_10_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters |
28
|
|
|
*/ |
29
|
|
|
private $LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Make a jena-text query condition that narrows the amount of search |
33
|
|
|
* results in term searches |
34
|
|
|
* |
35
|
|
|
* @param string $term search term |
36
|
|
|
* @param string $property property to search (e.g. 'skos:prefLabel'), or '' for default |
37
|
|
|
* @return string SPARQL text search clause |
38
|
|
|
*/ |
39
|
|
|
|
40
|
|
|
private function createTextQueryCondition($term, $property = '', $lang = '') |
41
|
|
|
{ |
42
|
|
|
// construct the lucene search term for jena-text |
43
|
|
|
|
44
|
|
|
// 1. Ensure characters with special meaning in Lucene are escaped |
45
|
|
|
$lucenemap = array(); |
46
|
|
|
foreach (str_split($this->LUCENE_ESCAPE_CHARS) as $char) { |
47
|
|
|
$lucenemap[$char] = '\\' . $char; // escape with a backslash |
48
|
|
|
} |
49
|
|
|
$term = strtr($term, $lucenemap); |
50
|
|
|
|
51
|
|
|
// 2. Ensure proper SPARQL quoting |
52
|
|
|
$term = str_replace('\\', '\\\\', $term); // escape backslashes |
53
|
|
|
$term = str_replace("'", "\\'", $term); // escape single quotes |
54
|
|
|
|
55
|
|
|
$lang_clause = empty($lang) ? '' : "'lang:$lang'"; |
56
|
|
|
|
57
|
|
|
$max_results = $this->MAX_N; |
58
|
|
|
|
59
|
|
|
return "(?s ?score ?match) text:query ($property '$term' $lang_clause $max_results) ."; |
60
|
|
|
} |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* Generate jena-text search condition for matching labels in SPARQL |
64
|
|
|
* @param string $term search term |
65
|
|
|
* @param string $search_lang language code used for matching labels (null means any language) |
66
|
|
|
* @return string sparql query snippet |
67
|
|
|
*/ |
68
|
|
|
protected function generateConceptSearchQueryCondition($term, $search_lang) |
69
|
|
|
{ |
70
|
|
|
# make text query clauses |
71
|
|
|
$textcond = $this->createTextQueryCondition($term, '?prop', $search_lang); |
72
|
|
|
|
73
|
|
|
if ($this->isDefaultEndpoint()) { |
74
|
|
|
# if doing a global search, we should target the union graph instead of a specific graph |
75
|
|
|
$textcond = "GRAPH <urn:x-arq:UnionGraph> { $textcond }"; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
return $textcond; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* Generates the jena-text-specific sparql query used for rendering the alphabetical index. |
83
|
|
|
* @param string $letter the letter (or special class) to search for |
84
|
|
|
* @param string $lang language of labels |
85
|
|
|
* @param integer $limit limits the amount of results |
86
|
|
|
* @param integer $offset offsets the result set |
87
|
|
|
* @param array $classes |
88
|
|
|
* @return string sparql query |
89
|
|
|
*/ |
90
|
|
|
|
91
|
|
|
public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null) |
92
|
|
|
{ |
93
|
|
|
if ($letter == '*' || $letter == '0-9' || $letter == '!*') { |
94
|
|
|
// text index cannot support special character queries, use the generic implementation for these |
95
|
|
|
return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes); |
|
|
|
|
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
$gc = $this->graphClause; |
99
|
|
|
$classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept'); |
100
|
|
|
$values = $this->formatValues('?type', $classes, 'uri'); |
101
|
|
|
$limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
102
|
|
|
|
103
|
|
|
# make text query clause |
104
|
|
|
$lcletter = mb_strtolower($letter, 'UTF-8'); // convert to lower case, UTF-8 safe |
105
|
|
|
$textcond_pref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $lang); |
106
|
|
|
$textcond_alt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $lang); |
107
|
|
|
|
108
|
|
|
$query = <<<EOQ |
109
|
|
|
SELECT DISTINCT ?s ?label ?alabel |
110
|
|
|
WHERE { |
111
|
|
|
$gc { |
112
|
|
|
{ |
113
|
|
|
$textcond_pref |
114
|
|
|
FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter')) |
115
|
|
|
BIND(?match as ?label) |
116
|
|
|
} |
117
|
|
|
UNION |
118
|
|
|
{ |
119
|
|
|
$textcond_alt |
120
|
|
|
FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter')) |
121
|
|
|
BIND(?match as ?alabel) |
122
|
|
|
{ |
123
|
|
|
?s skos:prefLabel ?label . |
124
|
|
|
FILTER (langMatches(LANG(?label), '$lang')) |
125
|
|
|
} |
126
|
|
|
} |
127
|
|
|
?s a ?type . |
128
|
|
|
FILTER NOT EXISTS { ?s owl:deprecated true } |
129
|
|
|
} $values |
130
|
|
|
} |
131
|
|
|
ORDER BY LCASE(?match) $limitandoffset |
132
|
|
|
EOQ; |
133
|
|
|
return $query; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
} |
137
|
|
|
|
The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.
The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.
To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.