@@ -1,9 +1,9 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | /** |
| 3 | - * Copyright (c) 2012-2013 Aalto University and University of Helsinki |
|
| 4 | - * MIT License |
|
| 5 | - * see LICENSE.txt for more information |
|
| 6 | - */ |
|
| 3 | + * Copyright (c) 2012-2013 Aalto University and University of Helsinki |
|
| 4 | + * MIT License |
|
| 5 | + * see LICENSE.txt for more information |
|
| 6 | + */ |
|
| 7 | 7 | |
| 8 | 8 | /* Register text: namespace needed for jena-text queries */ |
| 9 | 9 | EasyRdf_Namespace::set('text', 'http://jena.apache.org/text#'); |
@@ -12,20 +12,20 @@ discard block |
||
| 12 | 12 | * Provides functions tailored to the JenaTextSparql extensions for the Fuseki SPARQL index. |
| 13 | 13 | */ |
| 14 | 14 | class JenaTextSparql extends GenericSparql { |
| 15 | - /** |
|
| 16 | - * How many results to ask from the jena-text index. jena-text defaults to |
|
| 17 | - * 10000, but that is too little in some cases. |
|
| 18 | - * See issue reports: |
|
| 19 | - * https://code.google.com/p/onki-light/issues/detail?id=109 (original, set to 1000000000) |
|
| 20 | - * https://github.com/NatLibFi/Skosmos/issues/41 (reduced to 100000 because of bad performance) |
|
| 21 | - */ |
|
| 22 | - private $MAX_N = 100000; |
|
| 23 | - |
|
| 24 | - /* |
|
| 15 | + /** |
|
| 16 | + * How many results to ask from the jena-text index. jena-text defaults to |
|
| 17 | + * 10000, but that is too little in some cases. |
|
| 18 | + * See issue reports: |
|
| 19 | + * https://code.google.com/p/onki-light/issues/detail?id=109 (original, set to 1000000000) |
|
| 20 | + * https://github.com/NatLibFi/Skosmos/issues/41 (reduced to 100000 because of bad performance) |
|
| 21 | + */ |
|
| 22 | + private $MAX_N = 100000; |
|
| 23 | + |
|
| 24 | + /* |
|
| 25 | 25 | * Characters that need to be quoted for the Lucene query parser. |
| 26 | 26 | * See http://lucene.apache.org/core/4_10_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters |
| 27 | 27 | */ |
| 28 | - private $LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion |
|
| 28 | + private $LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion |
|
| 29 | 29 | |
| 30 | 30 | /** |
| 31 | 31 | * Make a jena-text query condition that narrows the amount of search |
@@ -36,102 +36,102 @@ discard block |
||
| 36 | 36 | * @return string SPARQL text search clause |
| 37 | 37 | */ |
| 38 | 38 | |
| 39 | - private function createTextQueryCondition($term, $property = '', $lang = '') { |
|
| 40 | - // construct the lucene search term for jena-text |
|
| 41 | - |
|
| 42 | - // 1. Ensure characters with special meaning in Lucene are escaped |
|
| 43 | - $lucenemap = array(); |
|
| 44 | - foreach (str_split($this->LUCENE_ESCAPE_CHARS) as $char) { |
|
| 45 | - $lucenemap[$char] = '\\' . $char; // escape with a backslash |
|
| 46 | - } |
|
| 47 | - $term = strtr($term, $lucenemap); |
|
| 48 | - |
|
| 49 | - // 2. Ensure proper SPARQL quoting |
|
| 50 | - $term = str_replace('\\', '\\\\', $term); // escape backslashes |
|
| 51 | - $term = str_replace("'", "\\'", $term); // escape single quotes |
|
| 52 | - |
|
| 53 | - $lang_clause = empty($lang) ? '' : "'lang:$lang'"; |
|
| 54 | - |
|
| 55 | - $max_results = $this->MAX_N; |
|
| 56 | - |
|
| 57 | - return "(?s ?score ?literal) text:query ($property '$term' $lang_clause $max_results)"; |
|
| 58 | - } |
|
| 59 | - |
|
| 60 | - /** |
|
| 61 | - * Generates the jena-text-specific sparql query used for rendering the alphabetical index. |
|
| 62 | - * @param string $letter the letter (or special class) to search for |
|
| 63 | - * @param string $lang language of labels |
|
| 64 | - * @param integer $limit limits the amount of results |
|
| 65 | - * @param integer $offset offsets the result set |
|
| 66 | - * @param array $classes |
|
| 67 | - * @return string sparql query |
|
| 68 | - */ |
|
| 39 | + private function createTextQueryCondition($term, $property = '', $lang = '') { |
|
| 40 | + // construct the lucene search term for jena-text |
|
| 69 | 41 | |
| 70 | - /** |
|
| 71 | - * Query for concepts using a search term, with the jena-text index. |
|
| 72 | - * @param string $term search term |
|
| 73 | - * @param array $vocabs array of Vocabulary objects to search; empty for global search |
|
| 74 | - * @param string $lang language code of the returned labels |
|
| 75 | - * @param string $search_lang language code used for matching labels (null means any language) |
|
| 76 | - * @param int $limit maximum number of hits to retrieve; 0 for unlimited |
|
| 77 | - * @param int $offset offset of results to retrieve; 0 for beginning of list |
|
| 78 | - * @param string $arrayClass the URI for thesaurus array class, or null if not used |
|
| 79 | - * @param array $types limit search to concepts of the given type(s) |
|
| 80 | - * @param string $parent limit search to concepts which have the given concept as parent in the transitive broader hierarchy |
|
| 81 | - * @param string $group limit search to concepts which are in the given group |
|
| 82 | - * @param boolean $hidden include matches on hidden labels (default: true) |
|
| 83 | - * @param array $fields extra fields to include in the result (array of strings). (default: null = none) |
|
| 84 | - * @return string sparql query |
|
| 85 | - */ |
|
| 86 | - protected function generateConceptSearchQuery($term, $vocabs, $lang, $search_lang, $limit, $offset, $arrayClass, $types, $parent, $group, $hidden, $fields) { |
|
| 87 | - $gc = $this->graphClause; |
|
| 88 | - $limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
|
| 89 | - |
|
| 90 | - $formattedtype = $this->formatTypes($types, $arrayClass); |
|
| 91 | - |
|
| 92 | - $formattedbroader = $this->formatBroader($lang, $fields); |
|
| 93 | - $extravars = $formattedbroader['extravars']; |
|
| 94 | - $extrafields = $formattedbroader['extrafields']; |
|
| 95 | - |
|
| 96 | - // extra conditions for label language, if specified |
|
| 97 | - $labelcond_label = ($lang) ? "langMatches(lang(?label), '$lang')" : "langMatches(lang(?label), lang(?literal))"; |
|
| 98 | - // if search language and UI/display language differ, must also consider case where there is no prefLabel in |
|
| 99 | - // the display language; in that case, should use the label with the same language as the matched label |
|
| 100 | - $labelcond_fallback = ($search_lang != $lang) ? |
|
| 101 | - "OPTIONAL { # in case previous OPTIONAL block gives no labels |
|
| 42 | + // 1. Ensure characters with special meaning in Lucene are escaped |
|
| 43 | + $lucenemap = array(); |
|
| 44 | + foreach (str_split($this->LUCENE_ESCAPE_CHARS) as $char) { |
|
| 45 | + $lucenemap[$char] = '\\' . $char; // escape with a backslash |
|
| 46 | + } |
|
| 47 | + $term = strtr($term, $lucenemap); |
|
| 48 | + |
|
| 49 | + // 2. Ensure proper SPARQL quoting |
|
| 50 | + $term = str_replace('\\', '\\\\', $term); // escape backslashes |
|
| 51 | + $term = str_replace("'", "\\'", $term); // escape single quotes |
|
| 52 | + |
|
| 53 | + $lang_clause = empty($lang) ? '' : "'lang:$lang'"; |
|
| 54 | + |
|
| 55 | + $max_results = $this->MAX_N; |
|
| 56 | + |
|
| 57 | + return "(?s ?score ?literal) text:query ($property '$term' $lang_clause $max_results)"; |
|
| 58 | + } |
|
| 59 | + |
|
| 60 | + /** |
|
| 61 | + * Generates the jena-text-specific sparql query used for rendering the alphabetical index. |
|
| 62 | + * @param string $letter the letter (or special class) to search for |
|
| 63 | + * @param string $lang language of labels |
|
| 64 | + * @param integer $limit limits the amount of results |
|
| 65 | + * @param integer $offset offsets the result set |
|
| 66 | + * @param array $classes |
|
| 67 | + * @return string sparql query |
|
| 68 | + */ |
|
| 69 | + |
|
| 70 | + /** |
|
| 71 | + * Query for concepts using a search term, with the jena-text index. |
|
| 72 | + * @param string $term search term |
|
| 73 | + * @param array $vocabs array of Vocabulary objects to search; empty for global search |
|
| 74 | + * @param string $lang language code of the returned labels |
|
| 75 | + * @param string $search_lang language code used for matching labels (null means any language) |
|
| 76 | + * @param int $limit maximum number of hits to retrieve; 0 for unlimited |
|
| 77 | + * @param int $offset offset of results to retrieve; 0 for beginning of list |
|
| 78 | + * @param string $arrayClass the URI for thesaurus array class, or null if not used |
|
| 79 | + * @param array $types limit search to concepts of the given type(s) |
|
| 80 | + * @param string $parent limit search to concepts which have the given concept as parent in the transitive broader hierarchy |
|
| 81 | + * @param string $group limit search to concepts which are in the given group |
|
| 82 | + * @param boolean $hidden include matches on hidden labels (default: true) |
|
| 83 | + * @param array $fields extra fields to include in the result (array of strings). (default: null = none) |
|
| 84 | + * @return string sparql query |
|
| 85 | + */ |
|
| 86 | + protected function generateConceptSearchQuery($term, $vocabs, $lang, $search_lang, $limit, $offset, $arrayClass, $types, $parent, $group, $hidden, $fields) { |
|
| 87 | + $gc = $this->graphClause; |
|
| 88 | + $limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
|
| 89 | + |
|
| 90 | + $formattedtype = $this->formatTypes($types, $arrayClass); |
|
| 91 | + |
|
| 92 | + $formattedbroader = $this->formatBroader($lang, $fields); |
|
| 93 | + $extravars = $formattedbroader['extravars']; |
|
| 94 | + $extrafields = $formattedbroader['extrafields']; |
|
| 95 | + |
|
| 96 | + // extra conditions for label language, if specified |
|
| 97 | + $labelcond_label = ($lang) ? "langMatches(lang(?label), '$lang')" : "langMatches(lang(?label), lang(?literal))"; |
|
| 98 | + // if search language and UI/display language differ, must also consider case where there is no prefLabel in |
|
| 99 | + // the display language; in that case, should use the label with the same language as the matched label |
|
| 100 | + $labelcond_fallback = ($search_lang != $lang) ? |
|
| 101 | + "OPTIONAL { # in case previous OPTIONAL block gives no labels |
|
| 102 | 102 | ?s skos:prefLabel ?label . |
| 103 | 103 | FILTER (langMatches(lang(?label), lang(?literal))) }" : ""; |
| 104 | 104 | |
| 105 | - // extra conditions for parent and group, if specified |
|
| 106 | - $parentcond = ($parent) ? "?s skos:broader+ <$parent> ." : ""; |
|
| 107 | - $groupcond = ($group) ? "<$group> skos:member ?s ." : ""; |
|
| 108 | - $pgcond = $parentcond . $groupcond; |
|
| 105 | + // extra conditions for parent and group, if specified |
|
| 106 | + $parentcond = ($parent) ? "?s skos:broader+ <$parent> ." : ""; |
|
| 107 | + $groupcond = ($group) ? "<$group> skos:member ?s ." : ""; |
|
| 108 | + $pgcond = $parentcond . $groupcond; |
|
| 109 | 109 | |
| 110 | - $orderextra = $this->isDefaultEndpoint() ? $this->graph : ''; |
|
| 110 | + $orderextra = $this->isDefaultEndpoint() ? $this->graph : ''; |
|
| 111 | 111 | |
| 112 | - # make VALUES clauses |
|
| 113 | - $props = array('skos:prefLabel', 'skos:altLabel'); |
|
| 114 | - if ($hidden) { |
|
| 115 | - $props[] = 'skos:hiddenLabel'; |
|
| 116 | - } |
|
| 117 | - $values_prop = $this->formatValues('?prop', $props); |
|
| 112 | + # make VALUES clauses |
|
| 113 | + $props = array('skos:prefLabel', 'skos:altLabel'); |
|
| 114 | + if ($hidden) { |
|
| 115 | + $props[] = 'skos:hiddenLabel'; |
|
| 116 | + } |
|
| 117 | + $values_prop = $this->formatValues('?prop', $props); |
|
| 118 | 118 | |
| 119 | - $values_graph = $this->formatValuesGraph($vocabs); |
|
| 119 | + $values_graph = $this->formatValuesGraph($vocabs); |
|
| 120 | 120 | |
| 121 | - while (strpos($term, '**') !== false) { |
|
| 122 | - $term = str_replace('**', '*', $term); |
|
| 123 | - } |
|
| 124 | - // removes futile asterisks |
|
| 121 | + while (strpos($term, '**') !== false) { |
|
| 122 | + $term = str_replace('**', '*', $term); |
|
| 123 | + } |
|
| 124 | + // removes futile asterisks |
|
| 125 | 125 | |
| 126 | - # make text query clauses |
|
| 127 | - $textcond = $this->createTextQueryCondition($term, '?prop', $search_lang); |
|
| 126 | + # make text query clauses |
|
| 127 | + $textcond = $this->createTextQueryCondition($term, '?prop', $search_lang); |
|
| 128 | 128 | |
| 129 | - if ($this->isDefaultEndpoint()) { |
|
| 130 | - # if doing a global search, we should target the union graph instead of a specific graph |
|
| 131 | - $textcond = "GRAPH <urn:x-arq:UnionGraph> { $textcond }"; |
|
| 132 | - } |
|
| 129 | + if ($this->isDefaultEndpoint()) { |
|
| 130 | + # if doing a global search, we should target the union graph instead of a specific graph |
|
| 131 | + $textcond = "GRAPH <urn:x-arq:UnionGraph> { $textcond }"; |
|
| 132 | + } |
|
| 133 | 133 | |
| 134 | - $query = <<<EOQ |
|
| 134 | + $query = <<<EOQ |
|
| 135 | 135 | SELECT DISTINCT ?s ?label ?plabel ?alabel ?hlabel ?graph (GROUP_CONCAT(DISTINCT ?type) as ?types) |
| 136 | 136 | $extravars |
| 137 | 137 | WHERE { |
@@ -155,25 +155,25 @@ discard block |
||
| 155 | 155 | ORDER BY lcase(str(?literal)) lang(?literal) $orderextra $limitandoffset |
| 156 | 156 | $values_graph |
| 157 | 157 | EOQ; |
| 158 | - return $query; |
|
| 159 | - } |
|
| 158 | + return $query; |
|
| 159 | + } |
|
| 160 | 160 | |
| 161 | - public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null) { |
|
| 162 | - if ($letter == '*' || $letter == '0-9' || $letter == '!*') { |
|
| 163 | - // text index cannot support special character queries, use the generic implementation for these |
|
| 164 | - return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes); |
|
| 165 | - } |
|
| 161 | + public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null) { |
|
| 162 | + if ($letter == '*' || $letter == '0-9' || $letter == '!*') { |
|
| 163 | + // text index cannot support special character queries, use the generic implementation for these |
|
| 164 | + return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes); |
|
| 165 | + } |
|
| 166 | 166 | |
| 167 | - $gc = $this->graphClause; |
|
| 168 | - $classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept'); |
|
| 169 | - $values = $this->formatValues('?type', $classes, 'uri'); |
|
| 170 | - $limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
|
| 167 | + $gc = $this->graphClause; |
|
| 168 | + $classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept'); |
|
| 169 | + $values = $this->formatValues('?type', $classes, 'uri'); |
|
| 170 | + $limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
|
| 171 | 171 | |
| 172 | - # make text query clause |
|
| 173 | - $textcond_pref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $lang); |
|
| 174 | - $textcond_alt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $lang); |
|
| 172 | + # make text query clause |
|
| 173 | + $textcond_pref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $lang); |
|
| 174 | + $textcond_alt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $lang); |
|
| 175 | 175 | |
| 176 | - $query = <<<EOQ |
|
| 176 | + $query = <<<EOQ |
|
| 177 | 177 | SELECT DISTINCT ?s ?label ?alabel |
| 178 | 178 | WHERE { |
| 179 | 179 | $gc { |
@@ -196,7 +196,7 @@ discard block |
||
| 196 | 196 | } |
| 197 | 197 | ORDER BY LCASE(?literal) $limitandoffset |
| 198 | 198 | EOQ; |
| 199 | - return $query; |
|
| 200 | - } |
|
| 199 | + return $query; |
|
| 200 | + } |
|
| 201 | 201 | |
| 202 | 202 | } |