JenaTextSparql::generateLangClause()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
/* Register text: namespace needed for jena-text queries */
4
EasyRdf\RdfNamespace::set('text', 'http://jena.apache.org/text#'); // @codeCoverageIgnore
5
EasyRdf\RdfNamespace::set('arq', 'http://jena.apache.org/ARQ/function#'); // @codeCoverageIgnore
6
7
/**
8
 * Provides functions tailored to the JenaTextSparql extensions for the Fuseki SPARQL index.
9
 */
10
class JenaTextSparql extends GenericSparql
11
{
12
    /**
13
     * How many results to ask from the jena-text index. jena-text defaults to
14
     * 10000, but that is too little in some cases.
15
     * See issue reports:
16
     * https://code.google.com/p/onki-light/issues/detail?id=109 (original, set to 1000000000)
17
     * https://github.com/NatLibFi/Skosmos/issues/41 (reduced to 100000 because of bad performance)
18
     */
19
    public const MAX_N = 100000;
20
21
    /*
22
     * Characters that need to be quoted for the Lucene query parser.
23
     * See http://lucene.apache.org/core/4_10_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters
24
     */
25
    public const LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion
26
27
    /**
28
     * Make a jena-text query condition that narrows the amount of search
29
     * results in term searches
30
     *
31
     * @param string $term search term
32
     * @param string $property property to search (e.g. 'skos:prefLabel'), or '' for default
33
     * @param string $langClause jena-text clause to limit search by language code
34
     * @return string SPARQL text search clause
35
     */
36
37
    private function createTextQueryCondition($term, $property = '', $langClause = '')
38
    {
39
        // construct the lucene search term for jena-text
40
41
        // 1. Ensure characters with special meaning in Lucene are escaped
42
        $lucenemap = array();
43
        foreach (str_split(self::LUCENE_ESCAPE_CHARS) as $char) {
44
            $lucenemap[$char] = '\\' . $char; // escape with a backslash
45
        }
46
        $term = strtr($term, $lucenemap);
47
48
        // 2. Ensure proper SPARQL quoting
49
        $term = str_replace('\\', '\\\\', $term); // escape backslashes
50
        $term = str_replace("'", "\\'", $term); // escape single quotes
51
52
        $maxResults = self::MAX_N;
53
54
        return "(?s ?score ?match) text:query ($property '$term' $maxResults $langClause) .";
55
    }
56
57
    /**
58
     * Generate jena-text search condition for matching labels in SPARQL
59
     * @param string $term search term
60
     * @param string $searchLang language code used for matching labels (null means any language)
61
     * @return string sparql query snippet
62
     */
63
    protected function generateConceptSearchQueryCondition($term, $searchLang)
64
    {
65
        # make text query clauses
66
        $langClause = $searchLang ? '?langParam' : '';
67
        $textcond = $this->createTextQueryCondition($term, '?prop', $langClause);
68
69
        if ($this->isDefaultEndpoint()) {
70
            # if doing a global search, we should target the union graph instead of a specific graph
71
            $textcond = "GRAPH <urn:x-arq:UnionGraph> { $textcond }";
72
        }
73
74
        return $textcond;
75
    }
76
77
    /**
78
     *  This function generates jenatext language clauses from the search language tag
79
     * @param string $lang
80
     * @return string formatted language clause
81
     */
82
    protected function generateLangClause($lang)
83
    {
84
        return "'lang:$lang*'";
85
    }
86
87
88
    /**
89
     * Generates sparql query clauses used for ordering by an expression. Uses a special collation function
90
     * if configuration for it is enabled.
91
     * @param string $expression the expression used for ordering the results
92
     * @param string $lang language
93
     * @return string sparql order by clause
94
     */
95
    private function formatOrderBy($expression, $lang)
96
    {
97
        if (!$this->model->getConfig()->getCollationEnabled()) {
98
            return $expression;
99
        }
100
        $orderby = sprintf('arq:collation(\'%2$s\', %1$s)', $expression, $lang);
101
        return $orderby;
102
    }
103
104
    /**
105
     * Generates the jena-text-specific sparql query used for rendering the alphabetical index.
106
     * @param string $letter the letter (or special class) to search for
107
     * @param string $lang language of labels
108
     * @param integer $limit limits the amount of results
109
     * @param integer $offset offsets the result set
110
     * @param array|null $classes
111
     * @param boolean $showDeprecated whether to include deprecated concepts in the result (default: false)
112
     * @param \EasyRdf\Resource|null $qualifier alphabetical list qualifier resource or null (default: null)
113
     * @return string sparql query
114
     */
115
116
    public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null, $showDeprecated = false, $qualifier = null)
117
    {
118
        if ($letter == '*' || $letter == '0-9' || $letter == '!*') {
119
            // text index cannot support special character queries, use the generic implementation for these
120
            return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes, $showDeprecated, $qualifier);
121
        }
122
123
        $gc = $this->graphClause;
124
        $classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept');
125
        $values = $this->formatValues('?type', $classes, 'uri');
126
        $limitandoffset = $this->formatLimitAndOffset($limit, $offset);
127
128
        # make text query clause
129
        $lcletter = mb_strtolower($letter, 'UTF-8'); // convert to lower case, UTF-8 safe
130
        $langClause = $this->generateLangClause($lang);
131
        $textcondPref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $langClause);
132
        $textcondAlt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $langClause);
133
        $orderbyclause = $this->formatOrderBy("LCASE(?match)", $lang) . " STR(?s) LCASE(STR(?qualifier))";
134
135
        $qualifierClause = $qualifier ? "OPTIONAL { ?s <" . $qualifier->getURI() . "> ?qualifier }" : "";
136
137
        $filterDeprecated = "";
138
        if (!$showDeprecated) {
139
            $filterDeprecated = "FILTER NOT EXISTS { ?s owl:deprecated true }";
140
        }
141
142
        $query = <<<EOQ
143
SELECT DISTINCT ?s ?label ?alabel ?qualifier
144
WHERE {
145
  $gc {
146
    {
147
      $textcondPref
148
      FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter'))
149
      FILTER EXISTS { ?s skos:prefLabel ?match }
150
      BIND(?match as ?label)
151
    }
152
    UNION
153
    {
154
      $textcondAlt
155
      FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter'))
156
      FILTER EXISTS { ?s skos:altLabel ?match }
157
      BIND(?match as ?alabel)
158
      {
159
        ?s skos:prefLabel ?label .
160
        FILTER (langMatches(LANG(?label), '$lang'))
161
      }
162
    }
163
    ?s a ?type .
164
    $qualifierClause
165
    $filterDeprecated
166
  } $values
167
}
168
ORDER BY $orderbyclause $limitandoffset
169
EOQ;
170
        return $query;
171
    }
172
173
}
174