IndexCreator   A
last analyzed

Complexity

Total Complexity 25

Size/Duplication

Total Lines 188
Duplicated Lines 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
eloc 98
c 4
b 0
f 0
dl 0
loc 188
rs 10
wmc 25

2 Methods

Rating   Name   Duplication   Size   Complexity  
A getMorphology() 0 30 6
F createIndex() 0 141 19
1
<?php declare(strict_types = 1);
2
3
/**
4
 * Created by PhpStorm.
5
 * User: gordon
6
 * Date: 24/3/2561
7
 * Time: 21:14 น.
8
 */
9
10
namespace Suilven\ManticoreSearch\Service;
11
12
use Suilven\FreeTextSearch\Exception\UnsupportedException;
13
use Suilven\FreeTextSearch\Helper\IndexingHelper;
14
use Suilven\FreeTextSearch\Helper\SpecsHelper;
15
use Suilven\FreeTextSearch\Indexes;
16
use Suilven\FreeTextSearch\Types\FieldTypes;
17
use Suilven\FreeTextSearch\Types\LanguageTypes;
18
use Suilven\FreeTextSearch\Types\TokenizerTypes;
19
20
// @phpcs:disable Generic.Files.LineLength.TooLong
21
// @phpcs:disable SlevomatCodingStandard.Files.LineLength.LineTooLong
22
class IndexCreator extends \Suilven\FreeTextSearch\Base\IndexCreator implements \Suilven\FreeTextSearch\Interfaces\IndexCreator
23
{
24
    /**
25
     * Create an index
26
     *
27
     * @todo Refactor into Indexer base
28
     * @param string $indexName the name of the index
29
     */
30
    public function createIndex(string $indexName): void
31
    {
32
        $indexingHelper = new IndexingHelper();
33
        $fields = $indexingHelper->getFields($indexName);
34
        $storedFields = $this->getStoredFields($indexName);
35
36
        $specsHelper = new SpecsHelper();
37
        $specs = $specsHelper->getFieldSpecs($indexName);
38
39
        $columns = [];
40
        foreach ($fields as $field) {
41
            // this will be the most common
42
            $indexType = 'text';
43
            $options = [];
44
45
            if (isset($specs[$field])) {
46
                $fieldType = $specs[$field];
47
48
                // @todo configure index to strip HTML
49
                switch ($fieldType) {
50
                    case FieldTypes::FOREIGN_KEY:
51
                        // @todo this perhaps needs to be a token
52
                        // See https://docs.manticoresearch.com/3.4.0/html/indexing/data_types.html
53
54
                        // @todo also how to mark strings for tokenizing?
55
                        $indexType = 'bigint';
56
57
                        break;
58
                    case FieldTypes::INTEGER:
59
                        $indexType = 'integer';
60
61
                        break;
62
                    case FieldTypes::FLOAT:
63
                        $indexType = 'float';
64
65
                        break;
66
                    case FieldTypes::TIME:
67
                        $indexType = 'timestamp';
68
69
                        break;
70
                    case FieldTypes::BOOLEAN:
71
                        // @todo is there a better type?
72
                        $indexType = 'integer';
73
74
                        break;
75
                }
76
77
                if ($indexType === 'text') {
78
                    $options = ['indexed', 'stored'];
79
                }
80
            }
81
82
83
            // override for Link, do not index it.  The storing of the Link URL is to save on database hierarchy
84
            // traversal when rendering search results
85
            //if ($field === 'Link' || \in_array($field, $storedFields, true)) {
86
            if ($field === 'Link') {
87
                $indexType = 'text';
88
                $options = ['stored'];
89
            }
90
91
            if (\in_array($field, $storedFields, true)) {
92
                $options = ['stored'];
93
            }
94
            $columns[$field] = ['type' => $indexType, 'options' => $options];
95
        }
96
97
98
        // @todo Add has one
99
100
        $indexes = new Indexes();
101
        $index = $indexes->getIndex($indexName);
102
        $mvaFields = $index->getHasManyFields();
103
        $hasOneFields = $index->getHasOneFields();
104
105
        foreach (\array_keys($mvaFields) as $mvaColumnName) {
106
            $columns[$mvaColumnName] = ['type' => 'multi'];
107
        }
108
109
        foreach (\array_keys($hasOneFields) as $hasOneColumnName) {
110
            $columns[$hasOneColumnName] = ['type' => 'bigint'];
111
        }
112
113
114
        $client = new Client();
115
        $manticoreClient = $client->getConnection();
116
117
        $settings = [
118
            'rt_mem_limit' => '256M',
119
            'dict' => 'keywords',
120
            'min_infix_len' => 2,
121
            'html_strip' => 1,
122
            'bigram_index' => 'all',
123
            'stopwords' => 'en',
124
        ];
125
126
        $manticoreTokenizer = null;
127
128
        // @todo this may need refactored
129
        $manticoreLanguage = $index->getLanguage();
130
131
        $tokenizer = $index->getTokenizer();
132
        if ($tokenizer !== TokenizerTypes::NONE) {
133
            switch ($tokenizer) {
134
                case TokenizerTypes::PORTER:
135
                    $manticoreTokenizer = 'porter';
136
137
                    break;
138
                case TokenizerTypes::SNOWBALL:
139
                    $manticoreTokenizer = 'snowball';
140
141
                    break;
142
                case TokenizerTypes::METAPHONE:
143
                    $manticoreTokenizer = 'metaphone';
144
145
                    break;
146
                case TokenizerTypes::SOUNDEX:
147
                    $manticoreTokenizer = 'soundex';
148
149
                    break;
150
                case TokenizerTypes::LEMMATIZER:
151
                    $manticoreTokenizer = 'lemmatizer';
152
                    $settings['lemmatizer_base'] = '/usr/local/share';
153
154
                    break;
155
            }
156
157
            $settings['morphology'] = $this->getMorphology($manticoreTokenizer, $manticoreLanguage);
158
        }
159
160
161
162
163
        // drop index, and updating an existing one does not effect change
164
        $manticoreClient->indices()->drop(['index' => $indexName, 'body'=>['silent'=>true]]);
165
        $manticoreIndex = new \Manticoresearch\Index($manticoreClient, $indexName);
166
167
        $manticoreIndex->create(
168
            $columns,
169
            $settings,
170
            true
171
        );
172
    }
173
174
175
    /**
176
     * @TODO Increase range of languages
177
     * @return string the name of the tokenizer to use at the Manticore config level
178
     * @throws \Suilven\FreeTextSearch\Exception\UnsupportedException if the combination of tokenizer and language cannot be used
179
     */
180
    private function getMorphology(?string $tokenizer, string $language): string
181
    {
182
        // @TODO add other languages, this is to get things up and rolling
183
        if ($language !== LanguageTypes::ENGLISH) {
184
            throw new UnsupportedException('Only English is supported for now #WorkInProgress');
185
        }
186
187
        $result = TokenizerTypes::NONE;
188
189
        switch ($tokenizer) {
190
            case TokenizerTypes::PORTER:
191
                $result = 'stem_en';
192
193
                break;
194
            case TokenizerTypes::LEMMATIZER:
195
                // @todo make the _all configurable
196
                $result = 'lemmatize_en_all';
197
198
                break;
199
            case TokenizerTypes::SOUNDEX:
200
                $result = 'soundex';
201
202
                break;
203
            case TokenizerTypes::METAPHONE:
204
                $result = 'metaphone';
205
206
                break;
207
        }
208
209
        return $result;
210
    }
211
}
212