|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/* Register text: namespace needed for jena-text queries */ |
|
4
|
|
|
EasyRdf\RdfNamespace::set('text', 'http://jena.apache.org/text#'); // @codeCoverageIgnore |
|
5
|
|
|
EasyRdf\RdfNamespace::set('arq', 'http://jena.apache.org/ARQ/function#'); // @codeCoverageIgnore |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* Provides functions tailored to the JenaTextSparql extensions for the Fuseki SPARQL index. |
|
9
|
|
|
*/ |
|
10
|
|
|
class JenaTextSparql extends GenericSparql |
|
11
|
|
|
{ |
|
12
|
|
|
/** |
|
13
|
|
|
* How many results to ask from the jena-text index. jena-text defaults to |
|
14
|
|
|
* 10000, but that is too little in some cases. |
|
15
|
|
|
* See issue reports: |
|
16
|
|
|
* https://code.google.com/p/onki-light/issues/detail?id=109 (original, set to 1000000000) |
|
17
|
|
|
* https://github.com/NatLibFi/Skosmos/issues/41 (reduced to 100000 because of bad performance) |
|
18
|
|
|
*/ |
|
19
|
|
|
const MAX_N = 100000; |
|
20
|
|
|
|
|
21
|
|
|
/* |
|
22
|
|
|
* Characters that need to be quoted for the Lucene query parser. |
|
23
|
|
|
* See http://lucene.apache.org/core/4_10_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters |
|
24
|
|
|
*/ |
|
25
|
|
|
const LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion |
|
26
|
|
|
|
|
27
|
|
|
/** |
|
28
|
|
|
* Make a jena-text query condition that narrows the amount of search |
|
29
|
|
|
* results in term searches |
|
30
|
|
|
* |
|
31
|
|
|
* @param string $term search term |
|
32
|
|
|
* @param string $property property to search (e.g. 'skos:prefLabel'), or '' for default |
|
33
|
|
|
* @param string @$searchLang for language code |
|
34
|
|
|
* @return string SPARQL text search clause |
|
35
|
|
|
*/ |
|
36
|
|
|
|
|
37
|
|
|
private function createTextQueryCondition($term, $property = '', $searchLang = '') |
|
38
|
|
|
{ |
|
39
|
|
|
// construct the lucene search term for jena-text |
|
40
|
|
|
|
|
41
|
|
|
// 1. Ensure characters with special meaning in Lucene are escaped |
|
42
|
|
|
$lucenemap = array(); |
|
43
|
|
|
foreach (str_split(self::LUCENE_ESCAPE_CHARS) as $char) { |
|
44
|
|
|
$lucenemap[$char] = '\\' . $char; // escape with a backslash |
|
45
|
|
|
} |
|
46
|
|
|
$term = strtr($term, $lucenemap); |
|
47
|
|
|
|
|
48
|
|
|
// 2. Ensure proper SPARQL quoting |
|
49
|
|
|
$term = str_replace('\\', '\\\\', $term); // escape backslashes |
|
50
|
|
|
$term = str_replace("'", "\\'", $term); // escape single quotes |
|
51
|
|
|
|
|
52
|
|
|
$maxResults = self::MAX_N; |
|
53
|
|
|
|
|
54
|
|
|
$langClause = ''; |
|
55
|
|
|
if ($searchLang) { |
|
56
|
|
|
$langClause = "?langparam"; |
|
57
|
|
|
} |
|
58
|
|
|
|
|
59
|
|
|
return "(?s ?score ?match) text:query ($property '$term' $maxResults $langClause) ."; |
|
60
|
|
|
} |
|
61
|
|
|
|
|
62
|
|
|
/** |
|
63
|
|
|
* Generate jena-text search condition for matching labels in SPARQL |
|
64
|
|
|
* @param string $term search term |
|
65
|
|
|
* @param string $langClause language clause used for matching labels (null means any language) |
|
66
|
|
|
* @return string sparql query snippet |
|
67
|
|
|
*/ |
|
68
|
|
|
protected function generateConceptSearchQueryCondition($term, $langClause) |
|
69
|
|
|
{ |
|
70
|
|
|
# make text query clauses |
|
71
|
|
|
$textcond = $this->createTextQueryCondition($term, '?prop', $langClause); |
|
72
|
|
|
|
|
73
|
|
|
if ($this->isDefaultEndpoint()) { |
|
74
|
|
|
# if doing a global search, we should target the union graph instead of a specific graph |
|
75
|
|
|
$textcond = "GRAPH <urn:x-arq:UnionGraph> { $textcond }"; |
|
76
|
|
|
} |
|
77
|
|
|
|
|
78
|
|
|
return $textcond; |
|
79
|
|
|
} |
|
80
|
|
|
|
|
81
|
|
|
/** |
|
82
|
|
|
* This function generates jenatext language clauses from the search language tag |
|
83
|
|
|
* @param string $lang |
|
84
|
|
|
* @return string formatted language clause |
|
85
|
|
|
*/ |
|
86
|
|
|
protected function generateLangClause($lang) { |
|
87
|
|
|
return "'lang:$lang*'"; |
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
/** |
|
92
|
|
|
* Generates sparql query clauses used for ordering by an expression. Uses a special collation function |
|
93
|
|
|
* if configuration for it is enabled. |
|
94
|
|
|
* @param string $expression the expression used for ordering the results |
|
95
|
|
|
* @param string $lang language |
|
96
|
|
|
* @return string sparql order by clause |
|
97
|
|
|
*/ |
|
98
|
|
|
private function formatOrderBy($expression, $lang) { |
|
99
|
|
|
if(!$this->model->getConfig()->getCollationEnabled()) { |
|
100
|
|
|
return $expression; |
|
101
|
|
|
} |
|
102
|
|
|
$orderby = sprintf('arq:collation(\'%2$s\', %1$s)', $expression, $lang); |
|
103
|
|
|
return $orderby; |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
/** |
|
107
|
|
|
* Generates the jena-text-specific sparql query used for rendering the alphabetical index. |
|
108
|
|
|
* @param string $letter the letter (or special class) to search for |
|
109
|
|
|
* @param string $lang language of labels |
|
110
|
|
|
* @param integer $limit limits the amount of results |
|
111
|
|
|
* @param integer $offset offsets the result set |
|
112
|
|
|
* @param array|null $classes |
|
113
|
|
|
* @param boolean $showDeprecated whether to include deprecated concepts in the result (default: false) |
|
114
|
|
|
* @param \EasyRdf\Resource|null $qualifier alphabetical list qualifier resource or null (default: null) |
|
115
|
|
|
* @return string sparql query |
|
116
|
|
|
*/ |
|
117
|
|
|
|
|
118
|
|
|
public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null, $showDeprecated = false, $qualifier = null) |
|
119
|
|
|
{ |
|
120
|
|
|
if ($letter == '*' || $letter == '0-9' || $letter == '!*') { |
|
121
|
|
|
// text index cannot support special character queries, use the generic implementation for these |
|
122
|
|
|
return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes, $showDeprecated, $qualifier); |
|
123
|
|
|
} |
|
124
|
|
|
|
|
125
|
|
|
$gc = $this->graphClause; |
|
126
|
|
|
$classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept'); |
|
127
|
|
|
$values = $this->formatValues('?type', $classes, 'uri'); |
|
128
|
|
|
$limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
|
129
|
|
|
|
|
130
|
|
|
# make text query clause |
|
131
|
|
|
$lcletter = mb_strtolower($letter, 'UTF-8'); // convert to lower case, UTF-8 safe |
|
132
|
|
|
$textcondPref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $lang); |
|
133
|
|
|
$textcondAlt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $lang); |
|
134
|
|
|
$orderbyclause = $this->formatOrderBy("LCASE(?match)", $lang) . " STR(?s) LCASE(STR(?qualifier))"; |
|
135
|
|
|
|
|
136
|
|
|
$qualifierClause = $qualifier ? "OPTIONAL { ?s <" . $qualifier->getURI() . "> ?qualifier }" : ""; |
|
137
|
|
|
|
|
138
|
|
|
$filterDeprecated=""; |
|
139
|
|
|
if(!$showDeprecated){ |
|
140
|
|
|
$filterDeprecated="FILTER NOT EXISTS { ?s owl:deprecated true }"; |
|
141
|
|
|
} |
|
142
|
|
|
|
|
143
|
|
|
$query = <<<EOQ |
|
144
|
|
|
SELECT DISTINCT ?s ?label ?alabel ?qualifier |
|
145
|
|
|
WHERE { |
|
146
|
|
|
$gc { |
|
147
|
|
|
{ |
|
148
|
|
|
$textcondPref |
|
149
|
|
|
FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter')) |
|
150
|
|
|
FILTER EXISTS { ?s skos:prefLabel ?match } |
|
151
|
|
|
BIND(?match as ?label) |
|
152
|
|
|
} |
|
153
|
|
|
UNION |
|
154
|
|
|
{ |
|
155
|
|
|
$textcondAlt |
|
156
|
|
|
FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter')) |
|
157
|
|
|
FILTER EXISTS { ?s skos:altLabel ?match } |
|
158
|
|
|
BIND(?match as ?alabel) |
|
159
|
|
|
{ |
|
160
|
|
|
?s skos:prefLabel ?label . |
|
161
|
|
|
FILTER (langMatches(LANG(?label), '$lang')) |
|
162
|
|
|
} |
|
163
|
|
|
} |
|
164
|
|
|
?s a ?type . |
|
165
|
|
|
$qualifierClause |
|
166
|
|
|
$filterDeprecated |
|
167
|
|
|
} $values |
|
168
|
|
|
} |
|
169
|
|
|
ORDER BY $orderbyclause $limitandoffset |
|
170
|
|
|
EOQ; |
|
171
|
|
|
return $query; |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
|
|
} |
|
175
|
|
|
|