Passed
Push — master ( 904963...c192c4 )
by
unknown
14:57 queued 05:56
created

XapianIndexService::indexDocument()   F

Complexity

Conditions 15
Paths 289

Size

Total Lines 93
Code Lines 50

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 15
eloc 50
nc 289
nop 4
dl 0
loc 93
rs 3.9708
c 1
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/* For licensing terms, see /license.txt */
6
7
namespace Chamilo\CoreBundle\Search\Xapian;
8
9
use DateTimeImmutable;
10
use RuntimeException;
11
use Throwable;
12
use Xapian;
13
use XapianDocument;
14
use XapianStem;
15
use XapianTermGenerator;
16
use XapianWritableDatabase;
17
18
use const DATE_ATOM;
19
20
/**
21
 * Service responsible for indexing documents into the Xapian database.
22
 */
23
final class XapianIndexService
24
{
25
    private const DEFAULT_LANGUAGE = 'english';
26
27
    public function __construct(
28
        private readonly SearchIndexPathResolver $indexPathResolver,
29
    ) {}
30
31
    /**
32
     * Indexes a simple demo document so we can verify that search works end-to-end.
33
     *
34
     * @return int The Xapian internal document id
35
     *
36
     * @throws RuntimeException When indexing fails
37
     */
38
    public function indexDemoDocument(): int
39
    {
40
        $now = new DateTimeImmutable('now');
41
42
        $fields = [
43
            'title' => 'Demo test document',
44
            'content' => 'This is a test document indexed from XapianIndexService in Chamilo 2.',
45
            'created_at' => $now->format(DATE_ATOM),
46
        ];
47
48
        $terms = [
49
            'XTdemo',
50
            'XTchamilo',
51
        ];
52
53
        return $this->indexDocument($fields, $terms);
54
    }
55
56
    /**
57
     * Indexes a generic document.
58
     *
59
     * @param array<string,mixed> $fields            Arbitrary data to store and index as free-text
60
     * @param string[]            $terms             Optional list of additional terms to add to the document
61
     * @param string|null         $language          Language used for stemming (defaults to english)
62
     * @param array<string,mixed> $fieldValuesByCode Fielded values, e.g. ['k' => '...', 't' => '...']
63
     *
64
     * @return int The Xapian internal document id
65
     *
66
     * @throws RuntimeException When Xapian fails during indexing
67
     */
68
    public function indexDocument(
69
        array $fields,
70
        array $terms = [],
71
        ?string $language = null,
72
        array $fieldValuesByCode = [] // e.g. ['t'=>'...', 'd'=>'...', 'k'=>'...']
73
    ): int {
74
        if (!class_exists(XapianWritableDatabase::class)) {
75
            throw new RuntimeException('Xapian PHP extension is not loaded.');
76
        }
77
78
        $db = $this->openWritableDatabase();
79
80
        $doc = new XapianDocument();
81
        $termGen = new XapianTermGenerator();
82
83
        // normalize ISO/code into a Xapian stemmer language string
84
        $xapianLanguage = $this->mapLanguageToXapianStemmer($language);
85
86
        try {
87
            $stemmer = new XapianStem($xapianLanguage);
88
        } catch (Throwable $e) {
89
            error_log(
90
                '[Xapian] indexDocument: failed to init stemmer for lang='
91
                .var_export($xapianLanguage, true)
92
                .', fallback=english, error='.$e->getMessage()
93
            );
94
95
            $stemmer = new XapianStem(self::DEFAULT_LANGUAGE);
96
        }
97
98
        $termGen->set_stemmer($stemmer);
99
        $termGen->set_document($doc);
100
101
        // Unprefixed free-text (general search)
102
        foreach ($fields as $value) {
103
            if (null === $value) {
104
                continue;
105
            }
106
            $value = is_string($value) ? $value : (string) $value;
107
            $value = trim($value);
108
            if ('' === $value) {
109
                continue;
110
            }
111
            $termGen->index_text($value, 1);
112
        }
113
114
        // Prefixed dynamic fields: t:, d:, k:, etc.
115
        if (!empty($fieldValuesByCode)) {
116
            error_log('[Xapian] indexDocument: fieldValuesByCode='.json_encode(array_keys($fieldValuesByCode)));
117
118
            foreach ($fieldValuesByCode as $code => $val) {
119
                $code = strtolower(trim((string) $code));
120
                if ('' === $code) {
121
                    continue;
122
                }
123
124
                $val = is_string($val) ? $val : (string) $val;
125
                $val = trim($val);
126
                if ('' === $val) {
127
                    continue;
128
                }
129
130
                // Must match query parser convention: F + strtoupper(code)
131
                $prefix = 'F'.strtoupper($code);
132
133
                // This is what makes t: / d: / k: work
134
                $termGen->index_text($val, 1, $prefix);
135
            }
136
137
            // Optional: keep it in stored data for debugging
138
            $fields['searchFieldValues'] = $fieldValuesByCode;
139
        }
140
141
        // Extra terms (Tdocument, Cxx, Sxx...)
142
        foreach ($terms as $term) {
143
            $term = (string) $term;
144
            if ('' === $term) {
145
                continue;
146
            }
147
            $doc->add_term($term, 1);
148
        }
149
150
        $doc->set_data(serialize($fields));
151
152
        try {
153
            $docId = $db->add_document($doc);
154
            $db->flush();
155
156
            error_log('[Xapian] indexDocument: added docId='.$docId);
157
158
            return $docId;
159
        } catch (Throwable $e) {
160
            throw new RuntimeException(sprintf('Failed to index document in Xapian: %s', $e->getMessage()), 0, $e);
161
        }
162
    }
163
164
    /**
165
     * Deletes a document from the Xapian index using its internal document id.
166
     *
167
     * @throws RuntimeException When Xapian fails during deletion
168
     */
169
    public function deleteDocument(int $docId): void
170
    {
171
        if (!class_exists(XapianWritableDatabase::class)) {
172
            throw new RuntimeException('Xapian PHP extension is not loaded.');
173
        }
174
175
        $db = $this->openWritableDatabase();
176
177
        try {
178
            error_log(
179
                '[Xapian] XapianIndexService::deleteDocument: deleting docId='
180
                .var_export($docId, true)
181
            );
182
183
            $db->delete_document($docId);
184
            $db->flush();
185
        } catch (Throwable $e) {
186
            throw new RuntimeException(\sprintf('Failed to delete document in Xapian: %s', $e->getMessage()), 0, $e);
187
        }
188
    }
189
190
    /**
191
     * Opens the writable Xapian database using DB_CREATE_OR_OPEN.
192
     */
193
    private function openWritableDatabase(): XapianWritableDatabase
194
    {
195
        $indexDir = $this->indexPathResolver->getIndexDir();
196
197
        return new XapianWritableDatabase($indexDir, Xapian::DB_CREATE_OR_OPEN);
198
    }
199
200
    private function mapLanguageToXapianStemmer(?string $language): string
201
    {
202
        if (null === $language) {
203
            return self::DEFAULT_LANGUAGE;
204
        }
205
206
        $raw = strtolower(trim($language));
207
        if ('' === $raw) {
208
            return self::DEFAULT_LANGUAGE;
209
        }
210
211
        // If caller already provides a Xapian language name, accept it
212
        $known = [
213
            'english', 'spanish', 'french', 'portuguese', 'italian', 'german', 'dutch',
214
            'swedish', 'norwegian', 'danish', 'finnish', 'russian', 'arabic', 'greek',
215
            'turkish', 'romanian', 'hungarian', 'indonesian',
216
        ];
217
218
        if (in_array($raw, $known, true)) {
219
            return $raw;
220
        }
221
222
        // Normalize ISO variants: es_ES, pt-BR, en_US -> es, pt, en
223
        $iso = $raw;
224
        if (str_contains($iso, '_')) {
225
            $iso = explode('_', $iso, 2)[0];
226
        }
227
        if (str_contains($iso, '-')) {
228
            $iso = explode('-', $iso, 2)[0];
229
        }
230
        $iso = strtolower(trim($iso));
231
232
        $map = [
233
            'en' => 'english',
234
            'es' => 'spanish',
235
            'fr' => 'french',
236
            'pt' => 'portuguese',
237
            'it' => 'italian',
238
            'de' => 'german',
239
            'nl' => 'dutch',
240
            'sv' => 'swedish',
241
            'no' => 'norwegian',
242
            'da' => 'danish',
243
            'fi' => 'finnish',
244
            'ru' => 'russian',
245
            'ar' => 'arabic',
246
            'el' => 'greek',
247
            'tr' => 'turkish',
248
            'ro' => 'romanian',
249
            'hu' => 'hungarian',
250
            'id' => 'indonesian',
251
        ];
252
253
        return $map[$iso] ?? self::DEFAULT_LANGUAGE;
254
    }
255
}
256