XapianIndexService::indexDocument()   F
last analyzed

Complexity

Conditions 15
Paths 289

Size

Total Lines 93
Code Lines 50

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 15
eloc 50
nc 289
nop 4
dl 0
loc 93
rs 3.9708
c 1
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/* For licensing terms, see /license.txt */
6
7
namespace Chamilo\CoreBundle\Search\Xapian;
8
9
use Chamilo\CoreBundle\Entity\SearchEngineRef;
10
use DateTimeImmutable;
11
use Doctrine\ORM\EntityManagerInterface;
12
use RuntimeException;
13
use Throwable;
14
use Xapian;
15
use XapianDocument;
16
use XapianStem;
17
use XapianTermGenerator;
18
use XapianWritableDatabase;
19
20
use const DATE_ATOM;
21
22
/**
23
 * Service responsible for indexing documents into the Xapian database.
24
 */
25
final class XapianIndexService
26
{
27
    private const DEFAULT_LANGUAGE = 'english';
28
29
    public function __construct(
30
        private readonly SearchIndexPathResolver $indexPathResolver,
31
        private readonly EntityManagerInterface $em,
32
    ) {}
33
34
    /**
35
     * Indexes a simple demo document so we can verify that search works end-to-end.
36
     *
37
     * @return int The Xapian internal document id
38
     *
39
     * @throws RuntimeException When indexing fails
40
     */
41
    public function indexDemoDocument(): int
42
    {
43
        $now = new DateTimeImmutable('now');
44
45
        $fields = [
46
            'title' => 'Demo test document',
47
            'content' => 'This is a test document indexed from XapianIndexService in Chamilo 2.',
48
            'created_at' => $now->format(DATE_ATOM),
49
        ];
50
51
        $terms = [
52
            'XTdemo',
53
            'XTchamilo',
54
        ];
55
56
        return $this->indexDocument($fields, $terms);
57
    }
58
59
    /**
60
     * Indexes a generic document.
61
     *
62
     * @param array<string,mixed> $fields            Arbitrary data to store and index as free-text
63
     * @param string[]            $terms             Optional list of additional terms to add to the document
64
     * @param string|null         $language          Language used for stemming (defaults to english)
65
     * @param array<string,mixed> $fieldValuesByCode Fielded values, e.g. ['k' => '...', 't' => '...']
66
     *
67
     * @return int The Xapian internal document id
68
     *
69
     * @throws RuntimeException When Xapian fails during indexing
70
     */
71
    public function indexDocument(
72
        array $fields,
73
        array $terms = [],
74
        ?string $language = null,
75
        array $fieldValuesByCode = [] // e.g. ['t'=>'...', 'd'=>'...', 'k'=>'...']
76
    ): int {
77
        if (!class_exists(XapianWritableDatabase::class)) {
78
            throw new RuntimeException('Xapian PHP extension is not loaded.');
79
        }
80
81
        $db = $this->openWritableDatabase();
82
83
        $doc = new XapianDocument();
84
        $termGen = new XapianTermGenerator();
85
86
        // normalize ISO/code into a Xapian stemmer language string
87
        $xapianLanguage = $this->mapLanguageToXapianStemmer($language);
88
89
        try {
90
            $stemmer = new XapianStem($xapianLanguage);
91
        } catch (Throwable $e) {
92
            error_log(
93
                '[Xapian] indexDocument: failed to init stemmer for lang='
94
                .var_export($xapianLanguage, true)
95
                .', fallback=english, error='.$e->getMessage()
96
            );
97
98
            $stemmer = new XapianStem(self::DEFAULT_LANGUAGE);
99
        }
100
101
        $termGen->set_stemmer($stemmer);
102
        $termGen->set_document($doc);
103
104
        // Unprefixed free-text (general search)
105
        foreach ($fields as $value) {
106
            if (null === $value) {
107
                continue;
108
            }
109
            $value = \is_string($value) ? $value : (string) $value;
110
            $value = trim($value);
111
            if ('' === $value) {
112
                continue;
113
            }
114
            $termGen->index_text($value, 1);
115
        }
116
117
        // Prefixed dynamic fields: t:, d:, k:, etc.
118
        if (!empty($fieldValuesByCode)) {
119
            error_log('[Xapian] indexDocument: fieldValuesByCode='.json_encode(array_keys($fieldValuesByCode)));
120
121
            foreach ($fieldValuesByCode as $code => $val) {
122
                $code = strtolower(trim((string) $code));
123
                if ('' === $code) {
124
                    continue;
125
                }
126
127
                $val = \is_string($val) ? $val : (string) $val;
128
                $val = trim($val);
129
                if ('' === $val) {
130
                    continue;
131
                }
132
133
                // Must match query parser convention: F + strtoupper(code)
134
                $prefix = 'F'.strtoupper($code);
135
136
                // This is what makes t: / d: / k: work
137
                $termGen->index_text($val, 1, $prefix);
138
            }
139
140
            // Optional: keep it in stored data for debugging
141
            $fields['searchFieldValues'] = $fieldValuesByCode;
142
        }
143
144
        // Extra terms (Tdocument, Cxx, Sxx...)
145
        foreach ($terms as $term) {
146
            $term = (string) $term;
147
            if ('' === $term) {
148
                continue;
149
            }
150
            $doc->add_term($term, 1);
151
        }
152
153
        $doc->set_data(serialize($fields));
154
155
        try {
156
            $docId = $db->add_document($doc);
157
            $db->flush();
158
159
            error_log('[Xapian] indexDocument: added docId='.$docId);
160
161
            return $docId;
162
        } catch (Throwable $e) {
163
            throw new RuntimeException(\sprintf('Failed to index document in Xapian: %s', $e->getMessage()), 0, $e);
164
        }
165
    }
166
167
    /**
168
     * Deletes a document from the Xapian index using its internal document id.
169
     *
170
     * @throws RuntimeException When Xapian fails during deletion
171
     */
172
    public function deleteDocument(int $docId): void
173
    {
174
        if (!class_exists(XapianWritableDatabase::class)) {
175
            throw new RuntimeException('Xapian PHP extension is not loaded.');
176
        }
177
178
        $db = $this->openWritableDatabase();
179
180
        try {
181
            error_log(
182
                '[Xapian] XapianIndexService::deleteDocument: deleting docId='
183
                .var_export($docId, true)
184
            );
185
186
            $db->delete_document($docId);
187
            $db->flush();
188
        } catch (Throwable $e) {
189
            throw new RuntimeException(\sprintf('Failed to delete document in Xapian: %s', $e->getMessage()), 0, $e);
190
        }
191
    }
192
193
    /**
194
     * Opens the writable Xapian database using DB_CREATE_OR_OPEN.
195
     */
196
    private function openWritableDatabase(): XapianWritableDatabase
197
    {
198
        $indexDir = $this->indexPathResolver->getIndexDir();
199
200
        return new XapianWritableDatabase($indexDir, Xapian::DB_CREATE_OR_OPEN);
201
    }
202
203
    private function mapLanguageToXapianStemmer(?string $language): string
204
    {
205
        if (null === $language) {
206
            return self::DEFAULT_LANGUAGE;
207
        }
208
209
        $raw = strtolower(trim($language));
210
        if ('' === $raw) {
211
            return self::DEFAULT_LANGUAGE;
212
        }
213
214
        // If caller already provides a Xapian language name, accept it
215
        $known = [
216
            'english', 'spanish', 'french', 'portuguese', 'italian', 'german', 'dutch',
217
            'swedish', 'norwegian', 'danish', 'finnish', 'russian', 'arabic', 'greek',
218
            'turkish', 'romanian', 'hungarian', 'indonesian',
219
        ];
220
221
        if (\in_array($raw, $known, true)) {
222
            return $raw;
223
        }
224
225
        // Normalize ISO variants: es_ES, pt-BR, en_US -> es, pt, en
226
        $iso = $raw;
227
        if (str_contains($iso, '_')) {
228
            $iso = explode('_', $iso, 2)[0];
229
        }
230
        if (str_contains($iso, '-')) {
231
            $iso = explode('-', $iso, 2)[0];
232
        }
233
        $iso = strtolower(trim($iso));
234
235
        $map = [
236
            'en' => 'english',
237
            'es' => 'spanish',
238
            'fr' => 'french',
239
            'pt' => 'portuguese',
240
            'it' => 'italian',
241
            'de' => 'german',
242
            'nl' => 'dutch',
243
            'sv' => 'swedish',
244
            'no' => 'norwegian',
245
            'da' => 'danish',
246
            'fi' => 'finnish',
247
            'ru' => 'russian',
248
            'ar' => 'arabic',
249
            'el' => 'greek',
250
            'tr' => 'turkish',
251
            'ro' => 'romanian',
252
            'hu' => 'hungarian',
253
            'id' => 'indonesian',
254
        ];
255
256
        return $map[$iso] ?? self::DEFAULT_LANGUAGE;
257
    }
258
259
    public function purgeCourseIndex(int $courseId): void
260
    {
261
        // Get all Xapian document ids (search_did) linked to this course
262
        $rows = $this->em->createQueryBuilder()
263
            ->select('DISTINCT ser.searchDid AS searchDid')
264
            ->from(SearchEngineRef::class, 'ser')
265
            ->join('ser.resourceNode', 'rn')
266
            ->join('rn.resourceLinks', 'rl')
267
            ->join('rl.course', 'c')
268
            ->where('c.id = :courseId')
269
            ->setParameter('courseId', $courseId)
270
            ->getQuery()
271
            ->getScalarResult()
272
        ;
273
274
        foreach ($rows as $row) {
275
            $did = (int) ($row['searchDid'] ?? 0);
276
            if ($did <= 0) {
277
                continue;
278
            }
279
280
            // Delete the Xapian entry by its search_did (Xapian docid)
281
            $this->deleteBySearchDid($did);
282
        }
283
    }
284
285
    private function deleteBySearchDid(int $did): void
286
    {
287
        if ($did <= 0) {
288
            return;
289
        }
290
291
        try {
292
            // search_did == Xapian internal docid
293
            $this->deleteDocument($did);
294
        } catch (Throwable $e) {
295
            error_log(
296
                '[Xapian] deleteBySearchDid: delete failed for search_did='.$did.': '.
297
                $e->getMessage().' in '.$e->getFile().':'.$e->getLine()
298
            );
299
        }
300
    }
301
}
302