Passed
Pull Request — main (#3)
by Paolo
01:09
created

ElasticSearchAdapter::getIndex()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 22
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 13
nc 5
nop 0
dl 0
loc 22
rs 9.5222
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace BEdita\ElasticSearch\Adapter;
5
6
use BEdita\Core\Search\BaseAdapter;
7
use BEdita\ElasticSearch\Model\Document\Search;
8
use BEdita\ElasticSearch\Model\Index\AdapterCompatibleInterface;
9
use Cake\Database\Connection;
10
use Cake\Database\Expression\ComparisonExpression;
11
use Cake\Database\Expression\QueryExpression;
12
use Cake\Database\Schema\TableSchema;
13
use Cake\Datasource\EntityInterface;
14
use Cake\Datasource\FactoryLocator;
15
use Cake\ElasticSearch\Index;
16
use Cake\Log\LogTrait;
17
use Cake\ORM\Locator\LocatorAwareTrait;
18
use Cake\ORM\Query;
19
use Cake\ORM\Table;
20
use Cake\Utility\Security;
21
use Exception;
22
use Psr\Log\LogLevel;
23
use RuntimeException;
24
use UnexpectedValueException;
25
26
/**
27
 * ElasticSearch adapter for BEdita search.
28
 */
29
class ElasticSearchAdapter extends BaseAdapter
30
{
31
    use LocatorAwareTrait;
32
    use LogTrait;
33
34
    protected const MAX_RESULTS = 1000;
35
36
    /**
37
     * Index instance.
38
     *
39
     * @var \Cake\ElasticSearch\Index&\BEdita\ElasticSearch\Model\Index\AdapterCompatibleInterface
40
     */
41
    protected Index&AdapterCompatibleInterface $index;
42
43
    /**
44
     * Get index instance for search index.
45
     *
46
     * @return \BEdita\ElasticSearch\Model\Index\AdapterCompatibleInterface&\Cake\ElasticSearch\Index
47
     */
48
    protected function getIndex(): Index&AdapterCompatibleInterface
49
    {
50
        if (!isset($this->index)) {
51
            $index = $this->getConfig('index', 'BEdita/ElasticSearch.Search');
52
            if (is_string($index)) {
53
                /** @var \Cake\ElasticSearch\Datasource\IndexLocator $locator */
54
                $locator = FactoryLocator::get('ElasticSearch');
55
                $index = $locator->get($index);
56
            }
57
            if (!$index instanceof Index || !$index instanceof AdapterCompatibleInterface) {
58
                throw new UnexpectedValueException(sprintf(
59
                    'Search index must be an instance of %s that implements %s interface, got %s',
60
                    Index::class,
61
                    AdapterCompatibleInterface::class,
62
                    get_debug_type($index),
63
                ));
64
            }
65
66
            $this->index = $index;
67
        }
68
69
        return $this->index;
70
    }
71
72
    /**
73
     * @inheritDoc
74
     */
75
    public function search(Query $query, string $text, array $options = []): Query
76
    {
77
        return $this->buildQuery($query, $text, $options);
78
    }
79
80
    /**
81
     * {@inheritDoc}
82
     *
83
     * @codeCoverageIgnore
84
     */
85
    public function indexResource(EntityInterface $entity, string $operation): void
86
    {
87
        $this->getIndex()->reindex($entity, $operation);
88
    }
89
90
    /**
91
     * Build elastic search query
92
     *
93
     * @param string $text The search text
94
     * @param array $options The options
95
     * @return array<array{id: string, score: float}>
96
     */
97
    protected function buildElasticSearchQuery(string $text, array $options): array
98
    {
99
        return $this->getIndex()
100
            ->find('query', ['query' => $text] + $options)
101
            ->select(['_id', '_score'])
102
            ->limit(static::MAX_RESULTS)
103
            ->all()
104
            ->map(fn (Search $doc): array => ['id' => $doc->id, 'score' => $doc->score()])
105
            ->toList();
106
    }
107
108
    /**
109
     * Build query and return it
110
     *
111
     * @param \Cake\ORM\Query $query The query
112
     * @param string $text The search text
113
     * @param array $options The options
114
     * @return \Cake\ORM\Query
115
     */
116
    protected function buildQuery(Query $query, string $text, array $options): Query
117
    {
118
        $results = $this->buildElasticSearchQuery($text, $options);
119
        if (count($results) === 0) {
120
            // Nothing found. No results should be returned. Add a contradiction to the `WHERE` clause.
121
            return $query->where(new ComparisonExpression('1', '1', 'integer', '<>'));
122
        }
123
124
        // Prepare temporary table with `id` and `score` from ElasticSearch results.
125
        $tempTable = $this->createTempTable($query->getConnection());
126
        $insertQuery = $tempTable->query()->insert(['id', 'score']);
127
        foreach ($results as $row) {
128
            $insertQuery = $insertQuery->values($row);
129
        }
130
        $insertQuery->execute();
131
132
        // Add a join with the temporary table to filter by ID and sort by relevance score.
133
        return $query
134
            ->innerJoin(
135
                $tempTable->getTable(),
136
                (new QueryExpression())->equalFields(
137
                    $tempTable->aliasField('id'),
138
                    $query->getRepository()->aliasField('id'),
139
                ),
140
            )
141
            ->orderDesc($tempTable->aliasField('score'));
142
    }
143
144
    /**
145
     * Create a temporary table to store search results.
146
     * The table is created with a `score` column to sort results by relevance.
147
     * The table is dropped when the connection is closed.
148
     *
149
     * @param \Cake\Database\Connection $connection The database connection
150
     * @return \Cake\ORM\Table
151
     * @throws \RuntimeException
152
     */
153
    protected function createTempTable(Connection $connection): Table
154
    {
155
        $table = sprintf('elasticsearch_%s', Security::randomString(16));
156
        $schema = (new TableSchema($table))
157
            ->setTemporary(true)
158
            ->addColumn('id', [
159
                'type' => TableSchema::TYPE_INTEGER,
160
                'length' => 11,
161
                'unsigned' => true,
162
                'null' => false,
163
            ])
164
            ->addColumn('score', [
165
                'type' => TableSchema::TYPE_FLOAT,
166
                'null' => false,
167
            ])
168
            ->addConstraint(
169
                'PRIMARY',
170
                [
171
                    'type' => TableSchema::CONSTRAINT_PRIMARY,
172
                    'columns' => ['id'],
173
                ]
174
            )
175
            ->addIndex(
176
                sprintf('%s_score_idx', str_replace('_', '', $table)),
177
                [
178
                    'type' => TableSchema::INDEX_INDEX,
179
                    'columns' => ['score'],
180
                ]
181
            );
182
183
        try {
184
            // Execute SQL to create table. In MySQL the transaction is completely useless,
185
            // because `CREATE TABLE` implicitly implies a commit.
186
            $connection->transactional(function (Connection $connection) use ($schema): void {
187
                foreach ($schema->createSql($connection) as $statement) {
188
                    $connection->execute($statement);
189
                }
190
            });
191
        } catch (Exception $e) {
192
            $this->log(sprintf('Could not create temporary table for ElasticSearch results: %s', $e), LogLevel::ERROR);
193
194
            throw new RuntimeException('Could not create temporary table for ElasticSearch results', 0, $e);
195
        }
196
197
        return (new Table(compact('connection', 'table', 'schema')))
198
            ->setPrimaryKey('id')
199
            ->setDisplayField('score');
200
    }
201
}
202