Passed
Pull Request — master (#7159)
by
unknown
09:00
created

XapianIndexer::connectDb()   B

Complexity

Conditions 8
Paths 109

Size

Total Lines 44
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 23
c 0
b 0
f 0
nc 109
nop 3
dl 0
loc 44
rs 8.3694
1
<?php
2
/* For licensing terms, see /license.txt */
3
4
// @todo add setting to add xapian.php
5
// require_once 'xapian.php';
6
7
use Chamilo\CoreBundle\Framework\Container;
8
use Chamilo\CoreBundle\Search\Xapian\SearchIndexPathResolver;
9
10
/**
11
 * Abstract helper class.
12
 */
13
abstract class XapianIndexer
14
{
15
    /** @var XapianTermGenerator */
16
    public $indexer;
17
18
    /** @var XapianStem */
19
    public $stemmer;
20
21
    /** @var XapianWritableDatabase */
22
    protected $db;
23
24
    /** @var IndexableChunk[] */
25
    protected $chunks = [];
26
27
    /**
28
     * Class constructor.
29
     */
30
    public function __construct()
31
    {
32
        $this->db = null;
33
        $this->stemmer = null;
34
    }
35
36
    /**
37
     * Class destructor.
38
     */
39
    public function __destruct()
40
    {
41
        unset($this->db);
42
        unset($this->stemmer);
43
    }
44
45
    /**
46
     * Generates a list of languages Xapian manages.
47
     *
48
     * This method enables the definition of more matches between
49
     * Chamilo languages and Xapian languages (through hardcoding)
50
     *
51
     * @return array Array of languages codes -> Xapian languages
52
     */
53
    final public function xapian_languages()
54
    {
55
        /* http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html */
56
        return [
57
            'none' => 'none', // do not stem terms
58
            'da' => 'danish',
59
            'nl' => 'dutch',
60
            /* Martin Porter's 2002 revision of his stemmer */
61
            'en' => 'english',
62
            /* Lovin's stemmer */
63
            'lovins' => 'english_lovins',
64
            /* Porter's stemmer as described in his 1980 paper */
65
            'porter' => 'english_porter',
66
            'fi' => 'finnish',
67
            'fr' => 'french',
68
            'de' => 'german',
69
            'it' => 'italian',
70
            'no' => 'norwegian',
71
            'pt' => 'portuguese',
72
            'ru' => 'russian',
73
            'es' => 'spanish',
74
            'sv' => 'swedish',
75
        ];
76
    }
77
78
    /**
79
     * Connect to the database, and create it if it does not exist.
80
     *
81
     * In Chamilo 2, this will prefer the Symfony SearchIndexPathResolver
82
     * (var/search) and fall back to the legacy upload path when needed.
83
     */
84
    public function connectDb($path = null, $dbMode = null, $lang = 'english')
85
    {
86
        if ($this->db !== null) {
87
            return $this->db;
88
        }
89
90
        if ($dbMode === null) {
91
            $dbMode = Xapian::DB_CREATE_OR_OPEN;
92
        }
93
94
        if ($path === null) {
95
            // Legacy default path (Chamilo 1)
96
            $path = api_get_path(SYS_UPLOAD_PATH) . 'plugins/xapian/searchdb/';
0 ignored issues
show
Bug introduced by
The constant SYS_UPLOAD_PATH was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
97
98
            // If running under Chamilo 2, prefer the Symfony resolver and var/search
99
            if (class_exists(Container::class)) {
100
                try {
101
                    /** @var SearchIndexPathResolver $resolver */
102
                    $resolver = Container::getSearchIndexPathResolver();
103
                    $resolver->ensureIndexDirectoryExists();
104
                    $path = $resolver->getIndexDir();
105
                } catch (\Throwable $e) {
106
                    // Fallback to legacy path if resolver or container are not available.
107
                    // This keeps backward compatibility and avoids hard failures.
108
                }
109
            }
110
        }
111
112
        try {
113
            $this->db = new XapianWritableDatabase($path, $dbMode);
114
            $this->indexer = new XapianTermGenerator();
115
116
            if (!in_array($lang, $this->xapian_languages(), true)) {
117
                $lang = 'english';
118
            }
119
120
            $this->stemmer = new XapianStem($lang);
121
            $this->indexer->set_stemmer($this->stemmer);
122
123
            return $this->db;
124
        } catch (Exception $e) {
125
            echo Display::return_message($e->getMessage(), 'error');
126
127
            return 1;
128
        }
129
    }
130
131
    /**
132
     * Simple getter for the db attribute.
133
     *
134
     * @return object|null The db attribute
135
     */
136
    public function getDb()
137
    {
138
        return $this->db;
139
    }
140
141
    /**
142
     * Add this chunk to the chunk array attribute.
143
     *
144
     * @param mixed $chunk Chunk of text (IndexableChunk instance)
145
     */
146
    public function addChunk($chunk): void
147
    {
148
        $this->chunks[] = $chunk;
149
    }
150
151
    /**
152
     * Actually index the current data.
153
     *
154
     * @return int|null New Xapian document ID or null upon failure
155
     */
156
    public function index()
157
    {
158
        try {
159
            if (!empty($this->chunks)) {
160
                foreach ($this->chunks as $chunk) {
161
                    $doc = new XapianDocument();
162
                    $this->indexer->set_document($doc);
163
164
                    if (!empty($chunk->terms)) {
165
                        foreach ($chunk->terms as $term) {
166
                            // @todo consider using a proper weight value instead of 1
167
                            $doc->add_term($term['flag'] . $term['name'], 1);
168
                        }
169
                    }
170
171
                    // Free-form index all data array (title, content, etc.)
172
                    if (!empty($chunk->data)) {
173
                        foreach ($chunk->data as $key => $value) {
174
                            $this->indexer->index_text($value, 1);
175
                        }
176
                    }
177
178
                    $doc->set_data($chunk->xapian_data, 1);
179
                    $did = $this->db->add_document($doc);
180
181
                    // Make sure changes are flushed to disk
182
                    $this->db->flush();
183
184
                    return $did;
185
                }
186
            }
187
        } catch (Exception $e) {
188
            echo Display::return_message($e->getMessage(), 'error');
189
            exit(1);
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
190
        }
191
192
        return null;
193
    }
194
195
    /**
196
     * Get a specific document from Xapian db.
197
     *
198
     * @param int $did Xapian::docid
199
     *
200
     * @return XapianDocument|false XapianDocument, or false on error
201
     */
202
    public function get_document($did)
203
    {
204
        if ($this->db === null) {
205
            $this->connectDb();
206
        }
207
208
        try {
209
            $docid = $this->db->get_document($did);
210
        } catch (Exception $e) {
211
            // Intentionally silent here: caller will handle false result.
212
            return false;
213
        }
214
215
        return $docid;
216
    }
217
218
    /**
219
     * Get document data on a Xapian document.
220
     *
221
     * @param XapianDocument $doc Xapian document to read from
222
     *
223
     * @return mixed Xapian document data or false if error
224
     */
225
    public function get_document_data($doc)
226
    {
227
        if ($this->db === null) {
228
            $this->connectDb();
229
        }
230
231
        try {
232
            if (!is_a($doc, 'XapianDocument')) {
233
                return false;
234
            }
235
236
            $doc_data = $doc->get_data();
237
238
            return $doc_data;
239
        } catch (Exception $e) {
240
            // Intentionally silent here: caller will handle false result.
241
            return false;
242
        }
243
    }
244
245
    /**
246
     * Replace all terms of a document in Xapian db.
247
     *
248
     * @param int    $did    Xapian::docid
249
     * @param array  $terms  New terms of the document
250
     * @param string $prefix Prefix used to categorize the doc
251
     *                       (usually 'T' for title, 'A' for author)
252
     *
253
     * @return bool false on error
254
     */
255
    public function update_terms($did, $terms, $prefix): bool
256
    {
257
        $doc = $this->get_document($did);
258
        if ($doc === false) {
259
            return false;
260
        }
261
262
        $doc->clear_terms();
263
264
        foreach ($terms as $term) {
265
            // Add directly with given prefix
266
            $doc->add_term($prefix . $term, 1);
267
        }
268
269
        $this->db->replace_document($did, $doc);
270
        $this->db->flush();
271
272
        return true;
273
    }
274
275
    /**
276
     * Remove a document from Xapian db.
277
     *
278
     * @param int $did Xapian::docid
279
     */
280
    public function remove_document($did): void
281
    {
282
        if ($this->db === null) {
283
            $this->connectDb();
284
        }
285
286
        $did = (int) $did;
287
288
        if ($did > 0) {
289
            $doc = $this->get_document($did);
290
            if ($doc !== false) {
291
                $this->db->delete_document($did);
292
                $this->db->flush();
293
            }
294
        }
295
    }
296
297
    /**
298
     * Adds a term to the document specified.
299
     *
300
     * @param string         $term The term to add
301
     * @param XapianDocument $doc  The Xapian document where to add the term
302
     *
303
     * @return XapianDocument|false XapianDocument, or false on error
304
     */
305
    public function add_term_to_doc($term, $doc)
306
    {
307
        if (!is_a($doc, 'XapianDocument')) {
308
            return false;
309
        }
310
311
        try {
312
            $doc->add_term($term);
313
        } catch (Exception $e) {
314
            echo Display::return_message($e->getMessage(), 'error');
315
316
            return 1;
317
        }
318
319
        return $doc;
320
    }
321
322
    /**
323
     * Remove a term from the document specified.
324
     *
325
     * @param string         $term The term to remove
326
     * @param XapianDocument $doc  The Xapian document where to remove the term
327
     *
328
     * @return XapianDocument|false XapianDocument, or false on error
329
     */
330
    public function remove_term_from_doc($term, $doc)
331
    {
332
        if (!is_a($doc, 'XapianDocument')) {
333
            return false;
334
        }
335
336
        try {
337
            $doc->remove_term($term);
338
        } catch (Exception $e) {
339
            echo Display::return_message($e->getMessage(), 'error');
340
341
            return 1;
342
        }
343
344
        return $doc;
345
    }
346
347
    /**
348
     * Replace a document in the actual db.
349
     *
350
     * @param XapianDocument $doc Xapian document to push into the db
351
     * @param int            $did Xapian document id of the document to replace
352
     *
353
     * @return mixed
354
     */
355
    public function replace_document($doc, $did)
356
    {
357
        if (!is_a($doc, 'XapianDocument')) {
358
            return false;
359
        }
360
361
        if ($this->db === null) {
362
            $this->connectDb();
363
        }
364
365
        try {
366
            $this->getDb()->replace_document((int) $did, $doc);
367
            $this->getDb()->flush();
368
        } catch (Exception $e) {
369
            echo Display::return_message($e->getMessage(), 'error');
370
371
            return 1;
372
        }
373
374
        return $doc;
375
    }
376
}
377