NatLibFi /
Skosmos
| 1 | <?php |
||
| 2 | |||
| 3 | /* Register text: namespace needed for jena-text queries */ |
||
| 4 | EasyRdf\RdfNamespace::set('text', 'http://jena.apache.org/text#'); // @codeCoverageIgnore |
||
| 5 | EasyRdf\RdfNamespace::set('arq', 'http://jena.apache.org/ARQ/function#'); // @codeCoverageIgnore |
||
| 6 | |||
| 7 | /** |
||
| 8 | * Provides functions tailored to the JenaTextSparql extensions for the Fuseki SPARQL index. |
||
| 9 | */ |
||
| 10 | class JenaTextSparql extends GenericSparql |
||
| 11 | { |
||
| 12 | /** |
||
| 13 | * How many results to ask from the jena-text index. jena-text defaults to |
||
| 14 | * 10000, but that is too little in some cases. |
||
| 15 | * See issue reports: |
||
| 16 | * https://code.google.com/p/onki-light/issues/detail?id=109 (original, set to 1000000000) |
||
| 17 | * https://github.com/NatLibFi/Skosmos/issues/41 (reduced to 100000 because of bad performance) |
||
| 18 | */ |
||
| 19 | public const MAX_N = 100000; |
||
| 20 | |||
| 21 | /* |
||
| 22 | * Characters that need to be quoted for the Lucene query parser. |
||
| 23 | * See http://lucene.apache.org/core/4_10_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters |
||
| 24 | */ |
||
| 25 | public const LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion |
||
| 26 | |||
| 27 | /** |
||
| 28 | * Make a jena-text query condition that narrows the amount of search |
||
| 29 | * results in term searches |
||
| 30 | * |
||
| 31 | * @param string $term search term |
||
| 32 | * @param string $property property to search (e.g. 'skos:prefLabel'), or '' for default |
||
| 33 | * @param string $langClause jena-text clause to limit search by language code |
||
| 34 | * @return string SPARQL text search clause |
||
| 35 | */ |
||
| 36 | |||
| 37 | private function createTextQueryCondition($term, $property = '', $langClause = '') |
||
| 38 | { |
||
| 39 | // construct the lucene search term for jena-text |
||
| 40 | |||
| 41 | // 1. Ensure characters with special meaning in Lucene are escaped |
||
| 42 | $lucenemap = array(); |
||
| 43 | foreach (str_split(self::LUCENE_ESCAPE_CHARS) as $char) { |
||
| 44 | $lucenemap[$char] = '\\' . $char; // escape with a backslash |
||
| 45 | } |
||
| 46 | $term = strtr($term, $lucenemap); |
||
| 47 | |||
| 48 | // 2. Ensure proper SPARQL quoting |
||
| 49 | $term = str_replace('\\', '\\\\', $term); // escape backslashes |
||
| 50 | $term = str_replace("'", "\\'", $term); // escape single quotes |
||
| 51 | |||
| 52 | $maxResults = self::MAX_N; |
||
| 53 | |||
| 54 | return "(?s ?score ?match) text:query ($property '$term' $maxResults $langClause) ."; |
||
| 55 | } |
||
| 56 | |||
| 57 | /** |
||
| 58 | * Generate jena-text search condition for matching labels in SPARQL |
||
| 59 | * @param string $term search term |
||
| 60 | * @param string $searchLang language code used for matching labels (null means any language) |
||
| 61 | * @return string sparql query snippet |
||
| 62 | */ |
||
| 63 | protected function generateConceptSearchQueryCondition($term, $searchLang) |
||
| 64 | { |
||
| 65 | # make text query clauses |
||
| 66 | $langClause = $searchLang ? '?langParam' : ''; |
||
| 67 | $textcond = $this->createTextQueryCondition($term, '?prop', $langClause); |
||
| 68 | |||
| 69 | if ($this->isDefaultEndpoint()) { |
||
| 70 | # if doing a global search, we should target the union graph instead of a specific graph |
||
| 71 | $textcond = "GRAPH <urn:x-arq:UnionGraph> { $textcond }"; |
||
| 72 | } |
||
| 73 | |||
| 74 | return $textcond; |
||
| 75 | } |
||
| 76 | |||
| 77 | /** |
||
| 78 | * This function generates jenatext language clauses from the search language tag |
||
| 79 | * @param string $lang |
||
| 80 | * @return string formatted language clause |
||
| 81 | */ |
||
| 82 | protected function generateLangClause($lang) |
||
| 83 | { |
||
| 84 | return "'lang:$lang*'"; |
||
| 85 | } |
||
| 86 | |||
| 87 | |||
| 88 | /** |
||
| 89 | * Generates sparql query clauses used for ordering by an expression. Uses a special collation function |
||
| 90 | * if configuration for it is enabled. |
||
| 91 | * @param string $expression the expression used for ordering the results |
||
| 92 | * @param string $lang language |
||
| 93 | * @return string sparql order by clause |
||
| 94 | */ |
||
| 95 | private function formatOrderBy($expression, $lang) |
||
| 96 | { |
||
| 97 | if (!$this->model->getConfig()->getCollationEnabled()) { |
||
| 98 | return $expression; |
||
| 99 | } |
||
| 100 | $orderby = sprintf('arq:collation(\'%2$s\', %1$s)', $expression, $lang); |
||
| 101 | return $orderby; |
||
| 102 | } |
||
| 103 | |||
| 104 | /** |
||
| 105 | * Generates the jena-text-specific sparql query used for rendering the alphabetical index. |
||
| 106 | * @param string $letter the letter (or special class) to search for |
||
| 107 | * @param string $lang language of labels |
||
| 108 | * @param integer $limit limits the amount of results |
||
| 109 | * @param integer $offset offsets the result set |
||
| 110 | * @param array|null $classes |
||
| 111 | * @param boolean $showDeprecated whether to include deprecated concepts in the result (default: false) |
||
| 112 | * @param \EasyRdf\Resource|null $qualifier alphabetical list qualifier resource or null (default: null) |
||
| 113 | * @return string sparql query |
||
| 114 | */ |
||
| 115 | |||
| 116 | public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null, $showDeprecated = false, $qualifier = null) |
||
| 117 | { |
||
| 118 | if ($letter == '*' || $letter == '0-9' || $letter == '!*') { |
||
| 119 | // text index cannot support special character queries, use the generic implementation for these |
||
| 120 | return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes, $showDeprecated, $qualifier); |
||
| 121 | } |
||
| 122 | |||
| 123 | $gc = $this->graphClause; |
||
| 124 | $classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept'); |
||
| 125 | $values = $this->formatValues('?type', $classes, 'uri'); |
||
| 126 | $limitandoffset = $this->formatLimitAndOffset($limit, $offset); |
||
|
0 ignored issues
–
show
|
|||
| 127 | |||
| 128 | # make text query clause |
||
| 129 | $lcletter = mb_strtolower($letter, 'UTF-8'); // convert to lower case, UTF-8 safe |
||
| 130 | $langClause = $this->generateLangClause($lang); |
||
| 131 | $textcondPref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $langClause); |
||
| 132 | $textcondAlt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $langClause); |
||
| 133 | $orderbyclause = $this->formatOrderBy("LCASE(?match)", $lang) . " STR(?s) LCASE(STR(?qualifier))"; |
||
| 134 | |||
| 135 | $qualifierClause = $qualifier ? "OPTIONAL { ?s <" . $qualifier->getURI() . "> ?qualifier }" : ""; |
||
| 136 | |||
| 137 | $filterDeprecated = ""; |
||
| 138 | if (!$showDeprecated) { |
||
| 139 | $filterDeprecated = "FILTER NOT EXISTS { ?s owl:deprecated true }"; |
||
| 140 | } |
||
| 141 | |||
| 142 | $query = <<<EOQ |
||
| 143 | SELECT DISTINCT ?s ?label ?alabel ?qualifier |
||
| 144 | WHERE { |
||
| 145 | $gc { |
||
| 146 | { |
||
| 147 | $textcondPref |
||
| 148 | FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter')) |
||
| 149 | FILTER EXISTS { ?s skos:prefLabel ?match } |
||
| 150 | BIND(?match as ?label) |
||
| 151 | } |
||
| 152 | UNION |
||
| 153 | { |
||
| 154 | $textcondAlt |
||
| 155 | FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter')) |
||
| 156 | FILTER EXISTS { ?s skos:altLabel ?match } |
||
| 157 | BIND(?match as ?alabel) |
||
| 158 | { |
||
| 159 | ?s skos:prefLabel ?label . |
||
| 160 | FILTER (langMatches(LANG(?label), '$lang')) |
||
| 161 | } |
||
| 162 | } |
||
| 163 | ?s a ?type . |
||
| 164 | $qualifierClause |
||
| 165 | $filterDeprecated |
||
| 166 | } $values |
||
| 167 | } |
||
| 168 | ORDER BY $orderbyclause $limitandoffset |
||
| 169 | EOQ; |
||
| 170 | return $query; |
||
| 171 | } |
||
| 172 | |||
| 173 | } |
||
| 174 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.