Passed
Push — master ( 20a0f4...09a1a7 )
by Angel Fernando Quiroz
09:25
created

XapianIndexer::index()   B

Complexity

Conditions 8
Paths 44

Size

Total Lines 37
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
eloc 20
c 1
b 0
f 0
nc 44
nop 0
dl 0
loc 37
rs 8.4444
1
<?php
2
3
declare(strict_types=1);
4
5
/* For licensing terms, see /license.txt */
6
7
namespace Chamilo\CoreBundle\Search\Xapian;
8
9
use Exception;
10
use RuntimeException;
11
use Xapian;
12
use XapianDocument;
13
use XapianStem;
14
use XapianTermGenerator;
15
use XapianWritableDatabase;
16
17
/**
18
 * Base Xapian indexer for Chamilo 2.
19
 *
20
 * This is a modernized version of the legacy XapianIndexer from Chamilo 1,
21
 * adapted to namespaces and DI.
22
 */
23
abstract class XapianIndexer
24
{
25
    protected ?XapianTermGenerator $indexer = null;
26
27
    protected ?XapianStem $stemmer = null;
28
29
    protected ?XapianWritableDatabase $db = null;
30
31
    /**
32
     * @var array<int,object>
33
     */
34
    protected array $chunks = [];
35
36
    public function __construct(
37
        private readonly SearchIndexPathResolver $indexPathResolver,
38
    ) {
39
        // Defer DB opening until first use.
40
    }
41
42
    public function __destruct()
43
    {
44
        unset($this->db, $this->stemmer, $this->indexer);
45
    }
46
47
    /**
48
     * Returns the list of languages supported by Xapian.
49
     *
50
     * @return array<string,string> Language codes -> Xapian languages
51
     */
52
    final public function getSupportedLanguages(): array
53
    {
54
        return [
55
            'none' => 'none',
56
            'da' => 'danish',
57
            'nl' => 'dutch',
58
            'en' => 'english',
59
            'lovins' => 'english_lovins',
60
            'porter' => 'english_porter',
61
            'fi' => 'finnish',
62
            'fr' => 'french',
63
            'de' => 'german',
64
            'it' => 'italian',
65
            'no' => 'norwegian',
66
            'pt' => 'portuguese',
67
            'ru' => 'russian',
68
            'es' => 'spanish',
69
            'sv' => 'swedish',
70
        ];
71
    }
72
73
    /**
74
     * Connect to the Xapian writable database, creating it when needed.
75
     *
76
     * @throws RuntimeException when the DB cannot be created or opened
77
     */
78
    public function connectDb(?string $path = null, ?int $dbMode = null, string $lang = 'english'): XapianWritableDatabase
79
    {
80
        require_once 'xapian.php';
81
82
        if ($this->db instanceof XapianWritableDatabase) {
83
            return $this->db;
84
        }
85
86
        if (null === $dbMode) {
87
            $dbMode = Xapian::DB_CREATE_OR_OPEN;
88
        }
89
90
        if (null === $path) {
91
            $path = $this->indexPathResolver->getIndexDir();
92
        }
93
94
        $this->indexPathResolver->ensureIndexDirectoryExists();
95
96
        try {
97
            $this->db = new XapianWritableDatabase($path, $dbMode);
98
            $this->indexer = new XapianTermGenerator();
99
100
            $supported = $this->getSupportedLanguages();
101
            if (!\in_array($lang, $supported, true)) {
102
                $lang = 'english';
103
            }
104
105
            $this->stemmer = new XapianStem($lang);
106
            $this->indexer->set_stemmer($this->stemmer);
107
108
            return $this->db;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->db returns the type null which is incompatible with the type-hinted return XapianWritableDatabase.
Loading history...
109
        } catch (Exception $e) {
110
            throw new RuntimeException(\sprintf('Unable to create or open Xapian index at "%s": %s', $path, $e->getMessage()), 0, $e);
111
        }
112
    }
113
114
    /**
115
     * Simple getter for the writable database.
116
     */
117
    public function getDb(): ?XapianWritableDatabase
118
    {
119
        return $this->db;
120
    }
121
122
    /**
123
     * Add a chunk of indexable data to the batch.
124
     *
125
     * @param object $chunk Generic DTO with: terms[], data[], xapian_data
126
     */
127
    public function addChunk(object $chunk): void
128
    {
129
        $this->chunks[] = $chunk;
130
    }
131
132
    /**
133
     * Index the current batch of chunks.
134
     *
135
     * @return int|null New Xapian document ID, or null when nothing was indexed
136
     */
137
    public function index(): ?int
138
    {
139
        if (empty($this->chunks)) {
140
            return null;
141
        }
142
143
        $this->connectDb();
144
145
        try {
146
            foreach ($this->chunks as $chunk) {
147
                $doc = new XapianDocument();
148
                $this->indexer?->set_document($doc);
149
150
                if (!empty($chunk->terms)) {
151
                    foreach ($chunk->terms as $term) {
152
                        $doc->add_term($term['flag'].$term['name'], 1);
153
                    }
154
                }
155
156
                if (!empty($chunk->data)) {
157
                    foreach ($chunk->data as $value) {
158
                        $this->indexer?->index_text((string) $value, 1);
159
                    }
160
                }
161
162
                $doc->set_data($chunk->xapian_data, 1);
163
164
                $did = $this->db?->add_document($doc);
165
                $this->db?->flush();
166
167
                return $did ?? null;
168
            }
169
        } catch (Exception $e) {
170
            throw new RuntimeException(\sprintf('Failed to index chunk in Xapian: %s', $e->getMessage()), 0, $e);
171
        }
172
173
        return null;
174
    }
175
176
    /**
177
     * Fetch a document by its Xapian docid.
178
     */
179
    public function getDocument(int $did): ?XapianDocument
180
    {
181
        if (null === $this->db) {
182
            $this->connectDb();
183
        }
184
185
        try {
186
            return $this->db?->get_document($did) ?: null;
187
        } catch (Exception) {
188
            return null;
189
        }
190
    }
191
192
    /**
193
     * Update all terms of a document in the index.
194
     *
195
     * @param int    $did    Xapian docid
196
     * @param string $prefix Prefix used to categorize the terms
197
     * @param array  $terms  New terms (strings)
198
     */
199
    public function updateTerms(int $did, array $terms, string $prefix): bool
200
    {
201
        $doc = $this->getDocument($did);
202
        if (null === $doc) {
203
            return false;
204
        }
205
206
        $doc->clear_terms();
207
208
        foreach ($terms as $term) {
209
            $doc->add_term($prefix.$term, 1);
210
        }
211
212
        $this->db?->replace_document($did, $doc);
213
        $this->db?->flush();
214
215
        return true;
216
    }
217
218
    /**
219
     * Remove a document from the index.
220
     */
221
    public function removeDocument(int $did): void
222
    {
223
        if (null === $this->db) {
224
            $this->connectDb();
225
        }
226
227
        if ($did <= 0) {
228
            return;
229
        }
230
231
        $doc = $this->getDocument($did);
232
        if (null === $doc) {
233
            return;
234
        }
235
236
        $this->db?->delete_document($did);
237
        $this->db?->flush();
238
    }
239
240
    /**
241
     * Replace a document in the index.
242
     */
243
    public function replaceDocument(XapianDocument $doc, int $did): void
244
    {
245
        if (null === $this->db) {
246
            $this->connectDb();
247
        }
248
249
        $this->db?->replace_document($did, $doc);
250
        $this->db?->flush();
251
    }
252
}
253