Passed
Pull Request — master (#7144)
by
unknown
16:21 queued 06:48
created

XapianIndexer::connectDb()   B

Complexity

Conditions 8
Paths 109

Size

Total Lines 44
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 23
c 0
b 0
f 0
nc 109
nop 3
dl 0
loc 44
rs 8.3694
1
<?php
2
/* For licensing terms, see /license.txt */
3
4
// @todo add setting to add xapian.php
5
// require_once 'xapian.php';
6
7
use Chamilo\CoreBundle\Framework\Container;
8
use Chamilo\CoreBundle\Search\Xapian\SearchIndexPathResolver;
9
10
/**
11
 * Abstract helper class.
12
 */
13
abstract class XapianIndexer
14
{
15
    /** @var XapianTermGenerator */
16
    public $indexer;
17
18
    /** @var XapianStem */
19
    public $stemmer;
20
21
    /** @var XapianWritableDatabase */
22
    protected $db;
23
24
    /** @var IndexableChunk[] */
25
    protected $chunks = [];
26
27
    /**
28
     * Class constructor.
29
     */
30
    public function __construct()
31
    {
32
        $this->db = null;
33
        $this->stemmer = null;
34
    }
35
36
    /**
37
     * Class destructor.
38
     */
39
    public function __destruct()
40
    {
41
        unset($this->db);
42
        unset($this->stemmer);
43
    }
44
45
    /**
46
     * Generates a list of languages Xapian manages.
47
     *
48
     * This method enables the definition of more matches between
49
     * Chamilo languages and Xapian languages (through hardcoding)
50
     *
51
     * @return array Array of languages codes -> Xapian languages
52
     */
53
    final public function xapian_languages(): array
54
    {
55
        // See http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html for available languages.
56
        // The match with Chamilo's language table is made on the english_name field, not the ISO code.
57
        return [
58
            'none' => 'none', // do not stem terms
59
            'ar' => 'arabic',
60
            'ca' => 'catalan',
61
            'da' => 'danish',
62
            'de' => 'german',
63
            'en' => 'english',
64
            'es' => 'spanish',
65
            'eu' => 'basque',
66
            'fi' => 'finnish',
67
            'fr' => 'french',
68
            'ga' => 'irish',
69
            'hu' => 'hungarian',
70
            'hy' => 'armenian',
71
            'id' => 'indonesian',
72
            'it' => 'italian',
73
            'lt' => 'lithuanian',
74
            'ne' => 'nepali',
75
            'nl' => 'dutch',
76
            'no' => 'norwegian',
77
            'pt' => 'portuguese',
78
            'ro' => 'romanian',
79
            'ru' => 'russian',
80
            'sv' => 'swedish',
81
            'ta' => 'tamil',
82
            'tr' => 'turkish',
83
        ];
84
    }
85
86
    /**
87
     * Connect to the database, and create it if it does not exist.
88
     *
89
     * In Chamilo 2, this will prefer the Symfony SearchIndexPathResolver
90
     * (var/search) and fall back to the legacy upload path when needed.
91
     */
92
    public function connectDb($path = null, $dbMode = null, $lang = 'english')
93
    {
94
        if ($this->db !== null) {
95
            return $this->db;
96
        }
97
98
        if ($dbMode === null) {
99
            $dbMode = Xapian::DB_CREATE_OR_OPEN;
100
        }
101
102
        if ($path === null) {
103
            // Legacy default path (Chamilo 1)
104
            $path = api_get_path(SYS_UPLOAD_PATH) . 'plugins/xapian/searchdb/';
0 ignored issues
show
Bug introduced by
The constant SYS_UPLOAD_PATH was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
105
106
            // If running under Chamilo 2, prefer the Symfony resolver and var/search
107
            if (class_exists(Container::class)) {
108
                try {
109
                    /** @var SearchIndexPathResolver $resolver */
110
                    $resolver = Container::getSearchIndexPathResolver();
111
                    $resolver->ensureIndexDirectoryExists();
112
                    $path = $resolver->getIndexDir();
113
                } catch (\Throwable $e) {
114
                    // Fallback to legacy path if resolver or container are not available.
115
                    // This keeps backward compatibility and avoids hard failures.
116
                }
117
            }
118
        }
119
120
        try {
121
            $this->db = new XapianWritableDatabase($path, $dbMode);
122
            $this->indexer = new XapianTermGenerator();
123
124
            if (!in_array($lang, $this->xapian_languages(), true)) {
125
                $lang = 'english';
126
            }
127
128
            $this->stemmer = new XapianStem($lang);
129
            $this->indexer->set_stemmer($this->stemmer);
130
131
            return $this->db;
132
        } catch (Exception $e) {
133
            echo Display::return_message($e->getMessage(), 'error');
134
135
            return 1;
136
        }
137
    }
138
139
    /**
140
     * Simple getter for the db attribute.
141
     *
142
     * @return object|null The db attribute
143
     */
144
    public function getDb()
145
    {
146
        return $this->db;
147
    }
148
149
    /**
150
     * Add this chunk to the chunk array attribute.
151
     *
152
     * @param mixed $chunk Chunk of text (IndexableChunk instance)
153
     */
154
    public function addChunk($chunk): void
155
    {
156
        $this->chunks[] = $chunk;
157
    }
158
159
    /**
160
     * Actually index the current data.
161
     *
162
     * @return int|null New Xapian document ID or null upon failure
163
     */
164
    public function index()
165
    {
166
        try {
167
            if (!empty($this->chunks)) {
168
                foreach ($this->chunks as $chunk) {
169
                    $doc = new XapianDocument();
170
                    $this->indexer->set_document($doc);
171
172
                    if (!empty($chunk->terms)) {
173
                        foreach ($chunk->terms as $term) {
174
                            // @todo consider using a proper weight value instead of 1
175
                            $doc->add_term($term['flag'] . $term['name'], 1);
176
                        }
177
                    }
178
179
                    // Free-form index all data array (title, content, etc.)
180
                    if (!empty($chunk->data)) {
181
                        foreach ($chunk->data as $key => $value) {
182
                            $this->indexer->index_text($value, 1);
183
                        }
184
                    }
185
186
                    $doc->set_data($chunk->xapian_data, 1);
187
                    $did = $this->db->add_document($doc);
188
189
                    // Make sure changes are flushed to disk
190
                    $this->db->flush();
191
192
                    return $did;
193
                }
194
            }
195
        } catch (Exception $e) {
196
            echo Display::return_message($e->getMessage(), 'error');
197
            exit(1);
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
198
        }
199
200
        return null;
201
    }
202
203
    /**
204
     * Get a specific document from Xapian db.
205
     *
206
     * @param int $did Xapian::docid
207
     *
208
     * @return XapianDocument|false XapianDocument, or false on error
209
     */
210
    public function get_document($did)
211
    {
212
        if ($this->db === null) {
213
            $this->connectDb();
214
        }
215
216
        try {
217
            $docid = $this->db->get_document($did);
218
        } catch (Exception $e) {
219
            // Intentionally silent here: caller will handle false result.
220
            return false;
221
        }
222
223
        return $docid;
224
    }
225
226
    /**
227
     * Get document data on a Xapian document.
228
     *
229
     * @param XapianDocument $doc Xapian document to read from
230
     *
231
     * @return mixed Xapian document data or false if error
232
     */
233
    public function get_document_data($doc)
234
    {
235
        if ($this->db === null) {
236
            $this->connectDb();
237
        }
238
239
        try {
240
            if (!is_a($doc, 'XapianDocument')) {
241
                return false;
242
            }
243
244
            $doc_data = $doc->get_data();
245
246
            return $doc_data;
247
        } catch (Exception $e) {
248
            // Intentionally silent here: caller will handle false result.
249
            return false;
250
        }
251
    }
252
253
    /**
254
     * Replace all terms of a document in Xapian db.
255
     *
256
     * @param int    $did    Xapian::docid
257
     * @param array  $terms  New terms of the document
258
     * @param string $prefix Prefix used to categorize the doc
259
     *                       (usually 'T' for title, 'A' for author)
260
     *
261
     * @return bool false on error
262
     */
263
    public function update_terms($did, $terms, $prefix): bool
264
    {
265
        $doc = $this->get_document($did);
266
        if ($doc === false) {
267
            return false;
268
        }
269
270
        $doc->clear_terms();
271
272
        foreach ($terms as $term) {
273
            // Add directly with given prefix
274
            $doc->add_term($prefix . $term, 1);
275
        }
276
277
        $this->db->replace_document($did, $doc);
278
        $this->db->flush();
279
280
        return true;
281
    }
282
283
    /**
284
     * Remove a document from Xapian db.
285
     *
286
     * @param int $did Xapian::docid
287
     */
288
    public function remove_document($did): void
289
    {
290
        if ($this->db === null) {
291
            $this->connectDb();
292
        }
293
294
        $did = (int) $did;
295
296
        if ($did > 0) {
297
            $doc = $this->get_document($did);
298
            if ($doc !== false) {
299
                $this->db->delete_document($did);
300
                $this->db->flush();
301
            }
302
        }
303
    }
304
305
    /**
306
     * Adds a term to the document specified.
307
     *
308
     * @param string         $term The term to add
309
     * @param XapianDocument $doc  The Xapian document where to add the term
310
     *
311
     * @return XapianDocument|false XapianDocument, or false on error
312
     */
313
    public function add_term_to_doc($term, $doc)
314
    {
315
        if (!is_a($doc, 'XapianDocument')) {
316
            return false;
317
        }
318
319
        try {
320
            $doc->add_term($term);
321
        } catch (Exception $e) {
322
            echo Display::return_message($e->getMessage(), 'error');
323
324
            return 1;
325
        }
326
327
        return $doc;
328
    }
329
330
    /**
331
     * Remove a term from the document specified.
332
     *
333
     * @param string         $term The term to remove
334
     * @param XapianDocument $doc  The Xapian document where to remove the term
335
     *
336
     * @return XapianDocument|false XapianDocument, or false on error
337
     */
338
    public function remove_term_from_doc($term, $doc)
339
    {
340
        if (!is_a($doc, 'XapianDocument')) {
341
            return false;
342
        }
343
344
        try {
345
            $doc->remove_term($term);
346
        } catch (Exception $e) {
347
            echo Display::return_message($e->getMessage(), 'error');
348
349
            return 1;
350
        }
351
352
        return $doc;
353
    }
354
355
    /**
356
     * Replace a document in the actual db.
357
     *
358
     * @param XapianDocument $doc Xapian document to push into the db
359
     * @param int            $did Xapian document id of the document to replace
360
     *
361
     * @return mixed
362
     */
363
    public function replace_document($doc, $did)
364
    {
365
        if (!is_a($doc, 'XapianDocument')) {
366
            return false;
367
        }
368
369
        if ($this->db === null) {
370
            $this->connectDb();
371
        }
372
373
        try {
374
            $this->getDb()->replace_document((int) $did, $doc);
375
            $this->getDb()->flush();
376
        } catch (Exception $e) {
377
            echo Display::return_message($e->getMessage(), 'error');
378
379
            return 1;
380
        }
381
382
        return $doc;
383
    }
384
}
385