Passed
Pull Request — master (#7159)
by
unknown
09:00
created

XapianIndexer::addChunk()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 3
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
/* For licensing terms, see /license.txt */
6
7
namespace Chamilo\CoreBundle\Search\Xapian;
8
9
use RuntimeException;
10
11
/**
12
 * Base Xapian indexer for Chamilo 2.
13
 *
14
 * This is a modernized version of the legacy XapianIndexer from Chamilo 1,
15
 * adapted to namespaces and DI.
16
 */
17
abstract class XapianIndexer
18
{
19
    /** @var \XapianTermGenerator|null */
20
    protected ?\XapianTermGenerator $indexer = null;
21
22
    /** @var \XapianStem|null */
23
    protected ?\XapianStem $stemmer = null;
24
25
    /** @var \XapianWritableDatabase|null */
26
    protected ?\XapianWritableDatabase $db = null;
27
28
    /** @var array<int,object> */
29
    protected array $chunks = [];
30
31
    public function __construct(
32
        private readonly SearchIndexPathResolver $indexPathResolver,
33
    ) {
34
        // Defer DB opening until first use.
35
    }
36
37
    public function __destruct()
38
    {
39
        unset($this->db, $this->stemmer, $this->indexer);
40
    }
41
42
    /**
43
     * Returns the list of languages supported by Xapian.
44
     *
45
     * @return array<string,string> Language codes -> Xapian languages
46
     */
47
    final public function getSupportedLanguages(): array
48
    {
49
        return [
50
            'none'   => 'none',
51
            'da'     => 'danish',
52
            'nl'     => 'dutch',
53
            'en'     => 'english',
54
            'lovins' => 'english_lovins',
55
            'porter' => 'english_porter',
56
            'fi'     => 'finnish',
57
            'fr'     => 'french',
58
            'de'     => 'german',
59
            'it'     => 'italian',
60
            'no'     => 'norwegian',
61
            'pt'     => 'portuguese',
62
            'ru'     => 'russian',
63
            'es'     => 'spanish',
64
            'sv'     => 'swedish',
65
        ];
66
    }
67
68
    /**
69
     * Connect to the Xapian writable database, creating it when needed.
70
     *
71
     * @throws RuntimeException When the DB cannot be created or opened.
72
     */
73
    public function connectDb(?string $path = null, ?int $dbMode = null, string $lang = 'english'): \XapianWritableDatabase
74
    {
75
        require_once 'xapian.php';
76
77
        if ($this->db instanceof \XapianWritableDatabase) {
78
            return $this->db;
79
        }
80
81
        if ($dbMode === null) {
82
            $dbMode = \Xapian::DB_CREATE_OR_OPEN;
83
        }
84
85
        if ($path === null) {
86
            $path = $this->indexPathResolver->getIndexDir();
87
        }
88
89
        $this->indexPathResolver->ensureIndexDirectoryExists();
90
91
        try {
92
            $this->db = new \XapianWritableDatabase($path, $dbMode);
93
            $this->indexer = new \XapianTermGenerator();
94
95
            $supported = $this->getSupportedLanguages();
96
            if (!\in_array($lang, $supported, true)) {
97
                $lang = 'english';
98
            }
99
100
            $this->stemmer = new \XapianStem($lang);
101
            $this->indexer->set_stemmer($this->stemmer);
102
103
            return $this->db;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->db returns the type null which is incompatible with the type-hinted return XapianWritableDatabase.
Loading history...
104
        } catch (\Exception $e) {
105
            throw new RuntimeException(
106
                \sprintf('Unable to create or open Xapian index at "%s": %s', $path, $e->getMessage()),
107
                0,
108
                $e
109
            );
110
        }
111
    }
112
113
    /**
114
     * Simple getter for the writable database.
115
     */
116
    public function getDb(): ?\XapianWritableDatabase
117
    {
118
        return $this->db;
119
    }
120
121
    /**
122
     * Add a chunk of indexable data to the batch.
123
     *
124
     * @param object $chunk Generic DTO with: terms[], data[], xapian_data
125
     */
126
    public function addChunk(object $chunk): void
127
    {
128
        $this->chunks[] = $chunk;
129
    }
130
131
    /**
132
     * Index the current batch of chunks.
133
     *
134
     * @return int|null New Xapian document ID, or null when nothing was indexed
135
     */
136
    public function index(): ?int
137
    {
138
        if (empty($this->chunks)) {
139
            return null;
140
        }
141
142
        $this->connectDb();
143
144
        try {
145
            foreach ($this->chunks as $chunk) {
146
                $doc = new \XapianDocument();
147
                $this->indexer?->set_document($doc);
148
149
                if (!empty($chunk->terms)) {
150
                    foreach ($chunk->terms as $term) {
151
                        $doc->add_term($term['flag'] . $term['name'], 1);
152
                    }
153
                }
154
155
                if (!empty($chunk->data)) {
156
                    foreach ($chunk->data as $value) {
157
                        $this->indexer?->index_text((string) $value, 1);
158
                    }
159
                }
160
161
                $doc->set_data($chunk->xapian_data, 1);
162
163
                $did = $this->db?->add_document($doc);
164
                $this->db?->flush();
165
166
                return $did ?? null;
167
            }
168
        } catch (\Exception $e) {
169
            throw new RuntimeException(
170
                sprintf('Failed to index chunk in Xapian: %s', $e->getMessage()),
171
                0,
172
                $e
173
            );
174
        }
175
176
        return null;
177
    }
178
179
    /**
180
     * Fetch a document by its Xapian docid.
181
     */
182
    public function getDocument(int $did): ?\XapianDocument
183
    {
184
        if ($this->db === null) {
185
            $this->connectDb();
186
        }
187
188
        try {
189
            return $this->db?->get_document($did) ?: null;
190
        } catch (\Exception) {
191
            return null;
192
        }
193
    }
194
195
    /**
196
     * Update all terms of a document in the index.
197
     *
198
     * @param int    $did    Xapian docid
199
     * @param string $prefix Prefix used to categorize the terms
200
     * @param array  $terms  New terms (strings)
201
     */
202
    public function updateTerms(int $did, array $terms, string $prefix): bool
203
    {
204
        $doc = $this->getDocument($did);
205
        if ($doc === null) {
206
            return false;
207
        }
208
209
        $doc->clear_terms();
210
211
        foreach ($terms as $term) {
212
            $doc->add_term($prefix . $term, 1);
213
        }
214
215
        $this->db?->replace_document($did, $doc);
216
        $this->db?->flush();
217
218
        return true;
219
    }
220
221
    /**
222
     * Remove a document from the index.
223
     */
224
    public function removeDocument(int $did): void
225
    {
226
        if ($this->db === null) {
227
            $this->connectDb();
228
        }
229
230
        if ($did <= 0) {
231
            return;
232
        }
233
234
        $doc = $this->getDocument($did);
235
        if ($doc === null) {
236
            return;
237
        }
238
239
        $this->db?->delete_document($did);
240
        $this->db?->flush();
241
    }
242
243
    /**
244
     * Replace a document in the index.
245
     */
246
    public function replaceDocument(\XapianDocument $doc, int $did): void
247
    {
248
        if ($this->db === null) {
249
            $this->connectDb();
250
        }
251
252
        $this->db?->replace_document($did, $doc);
253
        $this->db?->flush();
254
    }
255
}
256