Passed
Pull Request — master (#7223)
by
unknown
09:25
created

configureDynamicFieldPrefixes()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 29
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 17
nc 4
nop 1
dl 0
loc 29
rs 9.7
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
/* For licensing terms, see /license.txt */
6
7
namespace Chamilo\CoreBundle\Search\Xapian;
8
9
use Doctrine\DBAL\Connection;
10
use RuntimeException;
11
use Throwable;
12
use XapianDatabase;
13
use XapianDocument;
14
use XapianEnquire;
15
use XapianQuery;
16
use XapianQueryParser;
17
use XapianStem;
18
19
/**
20
 * High-level Xapian search service for Chamilo 2.
21
 */
22
final class XapianSearchService
23
{
24
    public function __construct(
25
        private readonly SearchIndexPathResolver $indexPathResolver,
26
        private readonly Connection $conn,
27
    ) {}
28
29
    /**
30
     * Execute a simple search query against the Xapian index.
31
     *
32
     * Supports field queries like:
33
     *  - t:"some title"
34
     *  - d:lorem
35
     *  - k:peru
36
     *
37
     * @return array{
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{ at position 2 could not be parsed: the token is null at position 2.
Loading history...
38
     *     count:int,
39
     *     results:array<int,array<string,mixed>>
40
     * }
41
     */
42
    public function search(
43
        string $queryString,
44
        int $offset = 0,
45
        int $length = 10,
46
        array $extra = [],
47
        int $countType = 0,
48
    ): array {
49
        if (!class_exists(XapianDatabase::class)) {
50
            throw new RuntimeException('Xapian PHP extension is not loaded.');
51
        }
52
53
        $indexDir = $this->indexPathResolver->getIndexDir();
54
        $this->indexPathResolver->ensureIndexDirectoryExists();
55
56
        try {
57
            $db = new XapianDatabase($indexDir);
58
        } catch (Throwable $e) {
59
            throw new RuntimeException(
60
                sprintf('Unable to open Xapian database at "%s": %s', $indexDir, $e->getMessage()),
61
                0,
62
                $e
63
            );
64
        }
65
66
        $enquire = new XapianEnquire($db);
67
68
        if ('' !== trim($queryString)) {
69
            $queryParser = new XapianQueryParser();
70
71
            // Resolve language for stemming. Caller can pass:
72
            // - $extra['language'] or $extra['language_iso'] (e.g. "fr_61", "fr_FR", "fr", "french")
73
            // - $extra['locale'] (e.g. "es_PE")
74
            $languageRaw = null;
75
            foreach (['language', 'language_iso', 'locale'] as $k) {
76
                if (isset($extra[$k]) && is_string($extra[$k]) && '' !== trim($extra[$k])) {
77
                    $languageRaw = trim((string) $extra[$k]);
78
                    break;
79
                }
80
            }
81
82
            // Normalize ISO/code into a Xapian stemmer language string (fallback to english).
83
            $xapianLanguage = $this->mapLanguageToXapianStemmer($languageRaw);
84
85
            $usedLanguage = $xapianLanguage;
86
87
            try {
88
                $stemmer = new XapianStem($xapianLanguage);
89
            } catch (Throwable $e) {
90
                $usedLanguage = 'english';
91
                $stemmer = new XapianStem($usedLanguage);
92
            }
93
94
            $queryParser->set_stemmer($stemmer);
95
            $queryParser->set_database($db);
96
            $queryParser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
97
98
            // Dynamic prefixes (t:, d:, k:, etc)
99
            $this->configureDynamicFieldPrefixes($queryParser);
100
101
            // IMPORTANT: make parsing consistent (phrases, boolean ops, etc)
102
            $flags = $this->buildQueryParserFlags();
103
104
            try {
105
                $query = $queryParser->parse_query($queryString, $flags);
106
            } catch (Throwable $e) {
107
                // Safe fallback: do not crash search endpoint on malformed queries.
108
                error_log('[Xapian] XapianSearchService::search: parse_query failed: '.$e->getMessage());
109
                $query = new XapianQuery('');
110
            }
111
        } else {
112
            $query = new XapianQuery('');
113
        }
114
115
        $enquire->set_query($query);
116
117
        $matches = $enquire->get_mset($offset, $length);
118
119
        $results = [];
120
        for ($m = $matches->begin(); !$m->equals($matches->end()); $m->next()) {
121
            $document = $m->get_document();
122
            if (!$document instanceof XapianDocument) {
123
                continue;
124
            }
125
126
            $rawData = $document->get_data();
127
            $data = '' !== $rawData ? @unserialize($rawData) : null;
128
129
            $results[] = [
130
                'doc_id' => $m->get_docid(),
131
                'score' => $m->get_percent(),
132
                'data' => $data,
133
            ];
134
        }
135
136
        $count = $matches->get_matches_estimated();
137
138
        return [
139
            'count' => $count,
140
            'results' => $results,
141
        ];
142
    }
143
144
    /**
145
     * Map ISO codes or language names into Xapian stemmer language.
146
     * Keeps behavior stable by falling back to english.
147
     */
148
    private function mapLanguageToXapianStemmer(?string $language): string
149
    {
150
        if (null === $language) {
151
            return 'english';
152
        }
153
154
        $raw = strtolower(trim($language));
155
        if ('' === $raw) {
156
            return 'english';
157
        }
158
159
        // If caller already provides a Xapian language name, accept it.
160
        $known = [
161
            'english', 'spanish', 'french', 'portuguese', 'italian', 'german', 'dutch',
162
            'swedish', 'norwegian', 'danish', 'finnish', 'russian', 'arabic', 'greek',
163
            'turkish', 'romanian', 'hungarian', 'indonesian',
164
        ];
165
166
        if (in_array($raw, $known, true)) {
167
            return $raw;
168
        }
169
170
        // Normalize ISO variants: es_ES, pt-BR, fr_61, en_US -> es, pt, fr, en
171
        $iso = $raw;
172
        if (str_contains($iso, '_')) {
173
            $iso = explode('_', $iso, 2)[0];
174
        }
175
        if (str_contains($iso, '-')) {
176
            $iso = explode('-', $iso, 2)[0];
177
        }
178
        $iso = strtolower(trim($iso));
179
180
        $map = [
181
            'en' => 'english',
182
            'es' => 'spanish',
183
            'fr' => 'french',
184
            'pt' => 'portuguese',
185
            'it' => 'italian',
186
            'de' => 'german',
187
            'nl' => 'dutch',
188
            'sv' => 'swedish',
189
            'no' => 'norwegian',
190
            'da' => 'danish',
191
            'fi' => 'finnish',
192
            'ru' => 'russian',
193
            'ar' => 'arabic',
194
            'el' => 'greek',
195
            'tr' => 'turkish',
196
            'ro' => 'romanian',
197
            'hu' => 'hungarian',
198
            'id' => 'indonesian',
199
        ];
200
201
        return $map[$iso] ?? 'english';
202
    }
203
204
    private function configureDynamicFieldPrefixes(XapianQueryParser $qp): void
205
    {
206
        try {
207
            $rows = $this->conn->fetchAllAssociative('SELECT code FROM search_engine_field');
208
        } catch (Throwable $e) {
209
            error_log('[Xapian] XapianSearchService: failed to read search_engine_field: '.$e->getMessage());
210
211
            // Safe fallback
212
            $qp->add_prefix('t', 'FT');
213
            $qp->add_prefix('d', 'FD');
214
            $qp->add_prefix('k', 'FK');
215
            $qp->add_prefix('c', 'FC');
216
217
            return;
218
        }
219
220
        $loaded = [];
221
222
        foreach ($rows as $row) {
223
            $code = strtolower(trim((string) ($row['code'] ?? '')));
224
            if ('' === $code) {
225
                continue;
226
            }
227
228
            // Must match indexing convention: 'F' + strtoupper(code)
229
            $prefix = 'F'.strtoupper($code);
230
            $qp->add_prefix($code, $prefix);
231
232
            $loaded[] = $code.':'.$prefix;
233
        }
234
    }
235
236
    private function buildQueryParserFlags(): int
237
    {
238
        // Start with default if available, otherwise 0
239
        $flags = 0;
240
241
        $defaultConst = XapianQueryParser::class.'::FLAG_DEFAULT';
242
        if (defined($defaultConst)) {
243
            $flags = constant($defaultConst);
244
        }
245
246
        // Add common useful flags if present in the binding
247
        $flagNames = [
248
            'FLAG_PHRASE',
249
            'FLAG_BOOLEAN',
250
            'FLAG_LOVEHATE',
251
            'FLAG_WILDCARD',
252
            'FLAG_PURE_NOT',
253
            'FLAG_SPELLING_CORRECTION',
254
            'FLAG_PARTIAL',
255
        ];
256
257
        foreach ($flagNames as $name) {
258
            $const = XapianQueryParser::class.'::'.$name;
259
            if (defined($const)) {
260
                $flags |= constant($const);
261
            }
262
        }
263
264
        return $flags;
265
    }
266
}
267