Passed
Push — master ( ab6f49...c762ff )
by
unknown
16:59 queued 08:07
created

xapian_get_all_terms()   A

Complexity

Conditions 6
Paths 17

Size

Total Lines 26
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 16
c 0
b 0
f 0
nc 17
nop 3
dl 0
loc 26
rs 9.1111
1
<?php
2
3
/* For licensing terms, see /license.txt */
4
5
require_once 'xapian.php';
6
// TODO: think another way without including specific fields here
7
require_once api_get_path(LIBRARY_PATH) . 'specific_fields_manager.lib.php';
8
9
use Chamilo\CoreBundle\Framework\Container;
10
11
/**
12
 * Legacy default index path (Chamilo 1).
13
 *
14
 * In Chamilo 2, we will prefer the Symfony SearchIndexPathResolver
15
 * but we keep this constant as a fallback for older installs.
16
 */
17
define('XAPIAN_DB', api_get_path(SYS_UPLOAD_PATH) . 'plugins/xapian/searchdb/');
0 ignored issues
show
Bug introduced by
The constant SYS_UPLOAD_PATH was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
18
19
/**
20
 * Returns a XapianDatabase instance using the configured index directory.
21
 *
22
 * In Chamilo 2, this will prefer the Symfony SearchIndexPathResolver
23
 * (var/search or whatever is configured) and fall back to the legacy
24
 * upload path when needed.
25
 *
26
 * @param XapianDatabase|null $db Existing database instance (optional)
27
 *
28
 * @return XapianDatabase
29
 *
30
 * @throws Exception If the database cannot be opened
31
 */
32
function xapian_get_database($db = null)
33
{
34
    if ($db instanceof XapianDatabase) {
35
        return $db;
36
    }
37
38
    // Default: legacy path (Chamilo 1 behavior)
39
    $path = XAPIAN_DB;
40
41
    // If Chamilo 2 container is available, try to use the new index dir
42
    if (class_exists(Container::class)) {
43
        try {
44
            /** @var \Chamilo\CoreBundle\Search\Xapian\SearchIndexPathResolver $resolver */
45
            $resolver = Container::getSearchIndexPathResolver();
46
            $resolver->ensureIndexDirectoryExists();
47
            $path = $resolver->getIndexDir();
48
        } catch (\Throwable $e) {
49
            // Fallback to legacy path if resolver or container are not available.
50
            // This keeps backward compatibility and avoids hard failures.
51
        }
52
    }
53
54
    return new XapianDatabase($path);
55
}
56
57
/**
58
 * Queries the database.
59
 * The xapian_query function queries the database using both a query string
60
 * and application-defined terms. Based on drupal-xapian.
61
 *
62
 * @param string              $query_string The search string. This string will
63
 *                                          be parsed and stemmed automatically.
64
 * @param XapianDatabase|null $db           Xapian database to connect
65
 * @param int                 $start        An integer defining the first
66
 *                                          document to return
67
 * @param int                 $length       The number of results to return
68
 * @param array               $extra        An array containing arrays of
69
 *                                          extra terms to search for
70
 * @param int                 $count_type   How to compute the match count:
71
 *                                          0 = best estimate,
72
 *                                          1 = lower bound,
73
 *                                          2 = upper bound
74
 *
75
 * @return array|null [int $count, array $results] or null on error
76
 */
77
function xapian_query($query_string, $db = null, $start = 0, $length = 10, $extra = [], $count_type = 0)
78
{
79
    try {
80
        $db = xapian_get_database($db);
81
82
        // Build subqueries from $extra array. Now only used by tags search filter on search widget.
83
        $subqueries = [];
84
        foreach ($extra as $subquery) {
85
            if (!empty($subquery)) {
86
                $subqueries[] = new XapianQuery($subquery);
87
            }
88
        }
89
90
        $query = null;
91
        $enquire = new XapianEnquire($db);
92
93
        if (!empty($query_string)) {
94
            $query_parser = new XapianQueryParser();
95
            // TODO: choose stemmer based on platform/user language if needed
96
            $stemmer = new XapianStem('english');
97
            $query_parser->set_stemmer($stemmer);
98
            $query_parser->set_database($db);
99
            $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
100
            $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID);
101
            $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID);
102
103
            $parsedQuery = $query_parser->parse_query($query_string);
104
            $final_array = array_merge($subqueries, [$parsedQuery]);
105
            $query = new XapianQuery(XapianQuery::OP_AND, $final_array);
106
        } else {
107
            // No free-text query: OR all subqueries (e.g. tag-only search)
108
            $query = new XapianQuery(XapianQuery::OP_OR, $subqueries);
109
        }
110
111
        $enquire->set_query($query);
112
113
        $matches = $enquire->get_mset((int) $start, (int) $length);
114
115
        $specific_fields = get_specific_field_list();
116
117
        $results = [];
118
        $i = $matches->begin();
119
120
        $count = 0;
121
122
        while (!$i->equals($matches->end())) {
123
            $count++;
124
            $document = $i->get_document();
125
126
            if (is_object($document)) {
127
                // Process one item terms (course id, tool id)
128
                $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID);
129
                $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1);
130
131
                $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID);
132
                $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1);
133
134
                // Process each specific field prefix
135
                foreach ($specific_fields as $specific_field) {
136
                    $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms(
137
                        $document,
138
                        $specific_field['code']
139
                    );
140
                }
141
142
                // Rest of data
143
                $results[$count]['xapian_data'] = unserialize($document->get_data());
144
                $results[$count]['score'] = $i->get_percent();
145
            }
146
147
            $i->next();
148
        }
149
150
        // Compute match count according to requested type
151
        switch ($count_type) {
152
            case 1: // Lower bound
153
                $count = $matches->get_matches_lower_bound();
154
                break;
155
156
            case 2: // Upper bound
157
                $count = $matches->get_matches_upper_bound();
158
                break;
159
160
            case 0: // Best estimate
161
            default:
162
                $count = $matches->get_matches_estimated();
163
                break;
164
        }
165
166
        return [$count, $results];
167
    } catch (Exception $e) {
168
        display_xapian_error($e->getMessage());
169
170
        return null;
171
    }
172
}
173
174
/**
175
 * Build a boolean query.
176
 *
177
 * @param string $term The term string
178
 *
179
 * @return XapianQuery
180
 */
181
function xapian_get_boolean_query($term)
182
{
183
    return new XapianQuery($term);
184
}
185
186
/**
187
 * Retrieve a list of database terms.
188
 *
189
 * @param int                 $count  Number of terms to retrieve (0 means "no limit")
190
 * @param string              $prefix The prefix of the term to retrieve
191
 * @param XapianDatabase|null $db     Xapian database to connect
192
 *
193
 * @return array|null
194
 */
195
function xapian_get_all_terms($count = 0, $prefix, $db = null)
196
{
197
    try {
198
        $db = xapian_get_database($db);
199
200
        if (!empty($prefix)) {
201
            $termi = $db->allterms_begin($prefix);
202
        } else {
203
            $termi = $db->allterms_begin();
204
        }
205
206
        $terms = [];
207
        $i = 0;
208
209
        for (; !$termi->equals($db->allterms_end()) && (++$i <= $count || $count === 0); $termi->next()) {
210
            $terms[] = [
211
                'frequency' => $termi->get_termfreq(),
212
                'name' => $termi->get_term(),
213
            ];
214
        }
215
216
        return $terms;
217
    } catch (Exception $e) {
218
        display_xapian_error($e->getMessage());
219
220
        return null;
221
    }
222
}
223
224
/**
225
 * Retrieve all terms of a document filtered by prefix.
226
 *
227
 * @param XapianDocument|null $doc    Document to inspect
228
 * @param string              $prefix Prefix used to filter the terms
229
 *
230
 * @return array|null
231
 */
232
function xapian_get_doc_terms($doc = null, $prefix)
233
{
234
    try {
235
        if (!is_a($doc, 'XapianDocument')) {
236
            return null;
237
        }
238
239
        // TODO: make the filter by prefix on xapian if possible
240
        // ojwb marvil07: use Document::termlist_begin() and then skip_to(prefix) on the TermIterator
241
        // ojwb you'll need to check the end condition by hand though
242
        $terms = [];
243
244
        for ($termi = $doc->termlist_begin(); !$termi->equals($doc->termlist_end()); $termi->next()) {
245
            $term = [
246
                'frequency' => $termi->get_termfreq(),
247
                'name' => $termi->get_term(),
248
            ];
249
250
            if ($term['name'][0] === $prefix) {
251
                $terms[] = $term;
252
            }
253
        }
254
255
        return $terms;
256
    } catch (Exception $e) {
257
        display_xapian_error($e->getMessage());
258
259
        return null;
260
    }
261
}
262
263
/**
264
 * Join Xapian queries.
265
 *
266
 * @param XapianQuery|array      $query1 First query or array of queries
267
 * @param XapianQuery|array|null $query2 Second query or array of queries (optional)
268
 * @param string                 $op     Logical operator: 'or' or 'and'
269
 *
270
 * @return XapianQuery
271
 */
272
function xapian_join_queries($query1, $query2 = null, $op = 'or')
273
{
274
    // Decide how to join, avoiding including xapian.php outside
275
    switch ($op) {
276
        case 'and':
277
            $op = XapianQuery::OP_AND;
278
            break;
279
        case 'or':
280
        default:
281
            $op = XapianQuery::OP_OR;
282
            break;
283
    }
284
285
    // Normalize parameters to arrays
286
    if (!is_array($query1)) {
287
        $query1 = [$query1];
288
    }
289
290
    if ($query2 === null) {
291
        // Join an array of queries with $op
292
        return new XapianQuery($op, $query1);
293
    }
294
295
    if (!is_array($query2)) {
296
        $query2 = [$query2];
297
    }
298
299
    return new XapianQuery($op, array_merge($query1, $query2));
300
}
301
302
/**
303
 * Maps Xapian errors to human-readable messages.
304
 *
305
 * @author Isaac flores paz <[email protected]>
306
 *
307
 * @param string $xapian_error_message The Xapian error message
308
 *
309
 * @return void
310
 */
311
function display_xapian_error($xapian_error_message)
312
{
313
    $message = explode(':', $xapian_error_message);
314
    $type_error_message = $message[0];
315
316
    if ($type_error_message === 'DatabaseOpeningError') {
317
        $message_error = get_lang('Failed to open the search database');
318
    } elseif ($type_error_message === 'DatabaseVersionError') {
319
        $message_error = get_lang('The search database uses an unsupported format');
320
    } elseif ($type_error_message === 'DatabaseModifiedError') {
321
        $message_error = get_lang('The search database has been modified/broken');
322
    } elseif ($type_error_message === 'DatabaseLockError') {
323
        $message_error = get_lang('Failed to lock the search database');
324
    } elseif ($type_error_message === 'DatabaseCreateError') {
325
        $message_error = get_lang('Failed to create the search database');
326
    } elseif ($type_error_message === 'DatabaseCorruptError') {
327
        $message_error = get_lang('The search database has suffered corruption');
328
    } elseif ($type_error_message === 'NetworkTimeoutError') {
329
        $message_error = get_lang('Connection timed out while communicating with the remote search database');
330
    } else {
331
        $message_error = get_lang('Error in search engine');
332
    }
333
334
    $display_message = get_lang('Error') . ' : ' . $message_error;
335
    echo Display::return_message($display_message, 'error');
336
}
337