Passed
Pull Request — master (#7159)
by
unknown
09:00
created

DocumentXapianIndexer   A

Complexity

Total Complexity 41

Size/Duplication

Total Lines 297
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 144
c 1
b 0
f 0
dl 0
loc 297
rs 9.1199
wmc 41

5 Methods

Rating   Name   Duplication   Size   Complexity  
B applyPrefilterConfigToTerms() 0 49 11
A deleteForResourceNodeId() 0 39 4
A __construct() 0 5 1
F indexDocument() 0 136 14
B resolveCourseSessionAndRootNode() 0 31 11

How to fix   Complexity   

Complex Class

Complex classes like DocumentXapianIndexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use DocumentXapianIndexer, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Chamilo\CoreBundle\Search\Xapian;
6
7
use Chamilo\CoreBundle\Entity\ResourceLink;
8
use Chamilo\CoreBundle\Entity\ResourceNode;
9
use Chamilo\CoreBundle\Entity\SearchEngineRef;
10
use Chamilo\CoreBundle\Settings\SettingsManager;
11
use Chamilo\CourseBundle\Entity\CDocument;
12
use Doctrine\ORM\EntityManagerInterface;
13
14
/**
15
 * Handles Xapian indexing for CDocument entities.
16
 */
17
final class DocumentXapianIndexer
18
{
19
    public function __construct(
20
        private readonly XapianIndexService $xapianIndexService,
21
        private readonly EntityManagerInterface $em,
22
        private readonly SettingsManager $settingsManager,
23
    ) {
24
    }
25
26
    /**
27
     * Index a CDocument into Xapian.
28
     *
29
     * @return int|null Xapian document id or null when indexing is skipped
30
     */
31
    public function indexDocument(CDocument $document): ?int
32
    {
33
        $resourceNode = $document->getResourceNode();
34
35
        error_log('[Xapian] indexDocument: start for iid='.(string) $document->getIid()
36
            .', resource_node_id='.($resourceNode ? $resourceNode->getId() : 'null')
37
            .', filetype='.$document->getFiletype()
38
        );
39
40
        // 1) Check if search is globally enabled
41
        $enabled = (string) $this->settingsManager->getSetting('search.search_enabled', true);
42
        error_log('[Xapian] indexDocument: search.search_enabled='.var_export($enabled, true));
43
44
        if ($enabled !== 'true') {
45
            error_log('[Xapian] indexDocument: search is disabled, skipping indexing');
46
47
            return null;
48
        }
49
50
        if (!$resourceNode instanceof ResourceNode) {
51
            error_log('[Xapian] indexDocument: missing ResourceNode, skipping');
52
53
            return null;
54
        }
55
56
        // Do not index folders
57
        if ($document->getFiletype() === 'folder') {
58
            error_log('[Xapian] indexDocument: skipping folder document, resource_node_id='
59
                .$resourceNode->getId()
60
            );
61
62
            return null;
63
        }
64
65
        // 2) Resolve course, session and course root node ids
66
        [$courseId, $sessionId, $courseRootNodeId] = $this->resolveCourseSessionAndRootNode($resourceNode);
67
68
        error_log('[Xapian] indexDocument: courseId='.var_export($courseId, true)
69
            .', sessionId='.var_export($sessionId, true)
70
            .', courseRootNodeId='.var_export($courseRootNodeId, true)
71
        );
72
73
        // 3) Get textual content if any
74
        $content = (string) ($resourceNode->getContent() ?? '');
75
        error_log('[Xapian] indexDocument: content_length='.strlen($content));
76
77
        // 4) Build fields payload
78
        $fields = [
79
            'title'              => (string) $document->getTitle(),
80
            'description'        => (string) ($document->getComment() ?? ''),
81
            'content'            => $content,
82
            'filetype'           => (string) $document->getFiletype(),
83
            'resource_node_id'   => (string) $resourceNode->getId(),
84
            'course_id'          => $courseId !== null ? (string) $courseId : '',
85
            'session_id'         => $sessionId !== null ? (string) $sessionId : '',
86
            'course_root_node_id'=> $courseRootNodeId !== null ? (string) $courseRootNodeId : '',
87
            'full_path'          => $document->getFullPath(),
88
        ];
89
90
        // 5) Base terms
91
        $terms = ['Tdocument'];
92
93
        if ($courseId !== null) {
94
            $terms[] = 'C'.$courseId;
95
        }
96
        if ($sessionId !== null) {
97
            $terms[] = 'S'.$sessionId;
98
        }
99
100
        // 6) Extra prefilter terms from config
101
        $this->applyPrefilterConfigToTerms($terms, $courseId, $sessionId, $document);
102
103
        error_log('[Xapian] indexDocument: terms='.json_encode($terms));
104
105
        // 7) Existing mapping?
106
        /** @var SearchEngineRef|null $existingRef */
107
        $existingRef = $this->em
108
            ->getRepository(SearchEngineRef::class)
109
            ->findOneBy(['resourceNodeId' => $resourceNode->getId()]);
110
111
        $existingDocId = $existingRef?->getSearchDid();
112
        error_log('[Xapian] indexDocument: existing SearchEngineRef id='
113
            .($existingRef?->getId() ?? 'null')
114
            .', existing_did='.var_export($existingDocId, true)
115
        );
116
117
        // 7.1) If we already had a doc in Xapian, try to delete it first
118
        if ($existingDocId !== null) {
119
            try {
120
                $this->xapianIndexService->deleteDocument($existingDocId);
121
                error_log('[Xapian] indexDocument: previous docId deleted='
122
                    .var_export($existingDocId, true)
123
                );
124
            } catch (\Throwable $e) {
125
                error_log('[Xapian] indexDocument: failed to delete previous docId='
126
                    .var_export($existingDocId, true)
127
                    .' error='.$e->getMessage()
128
                );
129
            }
130
        }
131
132
        // 8) Call Xapian (create new document)
133
        try {
134
            $docId = $this->xapianIndexService->indexDocument(
135
                $fields,
136
                $terms
137
            );
138
        } catch (\Throwable $e) {
139
            error_log('[Xapian] indexDocument: indexDocument() failed: '.$e->getMessage());
140
141
            return null;
142
        }
143
144
        error_log('[Xapian] indexDocument: XapianIndexService->indexDocument returned docId='
145
            .var_export($docId, true)
146
        );
147
148
        // 9) Persist mapping resource_node_id <-> search_did
149
        if ($existingRef instanceof SearchEngineRef) {
150
            $existingRef->setSearchDid($docId);
151
            error_log('[Xapian] indexDocument: updating existing SearchEngineRef id='.$existingRef->getId());
152
        } else {
153
            $existingRef = new SearchEngineRef();
154
            $existingRef->setResourceNodeId((int) $resourceNode->getId());
155
            $existingRef->setSearchDid($docId);
156
            $this->em->persist($existingRef);
157
            error_log('[Xapian] indexDocument: creating new SearchEngineRef for resource_node_id='
158
                .$resourceNode->getId()
159
            );
160
        }
161
162
        $this->em->flush();
163
164
        error_log('[Xapian] indexDocument: SearchEngineRef saved with id='.$existingRef->getId());
165
166
        return $docId;
167
    }
168
169
    /**
170
     * Remove a document from Xapian using the resource node id.
171
     */
172
    public function deleteForResourceNodeId(int $resourceNodeId): void
173
    {
174
        error_log('[Xapian] deleteForResourceNodeId: start, resource_node_id='.$resourceNodeId);
175
176
        /** @var SearchEngineRef|null $ref */
177
        $ref = $this->em
178
            ->getRepository(SearchEngineRef::class)
179
            ->findOneBy(['resourceNodeId' => $resourceNodeId]);
180
181
        if (!$ref instanceof SearchEngineRef) {
182
            error_log('[Xapian] deleteForResourceNodeId: no SearchEngineRef found, nothing to delete');
183
184
            return;
185
        }
186
187
        $docId = $ref->getSearchDid();
188
        error_log('[Xapian] deleteForResourceNodeId: found SearchEngineRef id='.$ref->getId()
189
            .', search_did='.var_export($docId, true)
190
        );
191
192
        if ($docId !== null) {
193
            try {
194
                $this->xapianIndexService->deleteDocument($docId);
195
                error_log('[Xapian] deleteForResourceNodeId: deleteDocument called for did='
196
                    .var_export($docId, true)
197
                );
198
            } catch (\Throwable $e) {
199
                error_log('[Xapian] deleteForResourceNodeId: deleteDocument failed for did='
200
                    .var_export($docId, true)
201
                    .' error='.$e->getMessage()
202
                );
203
            }
204
        }
205
206
        $this->em->remove($ref);
207
        $this->em->flush();
208
209
        error_log('[Xapian] deleteForResourceNodeId: SearchEngineRef removed for resource_node_id='
210
            .$resourceNodeId
211
        );
212
    }
213
214
    /**
215
     * Resolve course id, session id and course root node id from resource links.
216
     *
217
     * @return array{0: int|null, 1: int|null, 2: int|null}
218
     */
219
    private function resolveCourseSessionAndRootNode(ResourceNode $resourceNode): array
220
    {
221
        $courseId = null;
222
        $sessionId = null;
223
        $courseRootNodeId = null;
224
225
        foreach ($resourceNode->getResourceLinks() as $link) {
226
            if (!$link instanceof ResourceLink) {
227
                continue;
228
            }
229
230
            if ($courseId === null && $link->getCourse()) {
231
                $course = $link->getCourse();
232
                $courseId = $course->getId();
233
234
                $courseRootNode = $course->getResourceNode();
235
                if ($courseRootNode instanceof ResourceNode) {
236
                    $courseRootNodeId = $courseRootNode->getId();
237
                }
238
            }
239
240
            if ($sessionId === null && $link->getSession()) {
241
                $sessionId = $link->getSession()->getId();
242
            }
243
244
            if ($courseId !== null && $sessionId !== null && $courseRootNodeId !== null) {
245
                break;
246
            }
247
        }
248
249
        return [$courseId, $sessionId, $courseRootNodeId];
250
    }
251
252
    /**
253
     * Apply configured prefilter prefixes to Xapian terms.
254
     *
255
     * Expected JSON structure in search.search_prefilter_prefix, for example:
256
     *
257
     * {
258
     *   "course":  { "prefix": "C", "title": "Course" },
259
     *   "session": { "prefix": "S", "title": "Session" },
260
     *   "filetype": { "prefix": "F", "title": "File type" }
261
     * }
262
     *
263
     * "title" is meant for UI labels, "prefix" is used here for terms.
264
     */
265
    private function applyPrefilterConfigToTerms(
266
        array &$terms,
267
        ?int $courseId,
268
        ?int $sessionId,
269
        CDocument $document
270
    ): void {
271
        $raw = (string) $this->settingsManager->getSetting('search.search_prefilter_prefix', true);
272
        if ($raw === '') {
273
            return;
274
        }
275
276
        $config = json_decode($raw, true);
277
        if (!\is_array($config)) {
278
            return;
279
        }
280
281
        foreach ($config as $key => $item) {
282
            if (!\is_array($item)) {
283
                continue;
284
            }
285
286
            $prefix = (string) ($item['prefix'] ?? '');
287
            if ($prefix === '') {
288
                $prefix = strtoupper((string) $key);
289
            }
290
291
            switch ($key) {
292
                case 'course':
293
                    if ($courseId !== null) {
294
                        $terms[] = $prefix.(string) $courseId;
295
                    }
296
297
                    break;
298
299
                case 'session':
300
                    if ($sessionId !== null) {
301
                        $terms[] = $prefix.(string) $sessionId;
302
                    }
303
304
                    break;
305
306
                case 'filetype':
307
                    $terms[] = $prefix.$document->getFiletype();
308
309
                    break;
310
311
                default:
312
                    // Unknown key: ignore for now
313
                    break;
314
            }
315
        }
316
    }
317
}
318