1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* Test the functionality of the Searchable extension |
5
|
|
|
* @package elastica |
6
|
|
|
*/ |
7
|
|
|
class IndexSettingsTest extends ElasticsearchBaseTest { |
8
|
|
|
//public static $fixture_file = 'elastica/tests/ElasticaTest.yml'; |
|
|
|
|
9
|
|
|
public function setUp() { |
10
|
|
|
// this needs to be called in order to create the list of searchable |
11
|
|
|
// classes and fields that are available. Simulates part of a build |
12
|
|
|
$classes = array('SearchableTestPage','SiteTree','Page','FlickrPhotoTO','FlickrSetTO', |
13
|
|
|
'FlickrTagTO', 'FlickrAuthorTO', 'FlickrSetTO'); |
14
|
|
|
$this->requireDefaultRecordsFrom = $classes; |
15
|
|
|
|
16
|
|
|
// add Searchable extension where appropriate |
17
|
|
|
FlickrSetTO::add_extension('SilverStripe\Elastica\Searchable'); |
18
|
|
|
FlickrPhotoTO::add_extension('SilverStripe\Elastica\Searchable'); |
19
|
|
|
FlickrTagTO::add_extension('SilverStripe\Elastica\Searchable'); |
20
|
|
|
FlickrAuthorTO::add_extension('SilverStripe\Elastica\Searchable'); |
21
|
|
|
SearchableTestPage::add_extension('SilverStripe\Elastica\Searchable'); |
22
|
|
|
|
23
|
|
|
// load fixtures |
24
|
|
|
parent::setUp(); |
25
|
|
|
} |
26
|
|
|
|
27
|
|
|
/* |
|
|
|
|
28
|
|
|
Compare with structure as per |
29
|
|
|
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html#english-analyzer |
30
|
|
|
|
31
|
|
|
PUT /my_index |
32
|
|
|
{ |
33
|
|
|
"settings": { |
34
|
|
|
"analysis": { |
35
|
|
|
"char_filter": |
36
|
|
|
"tokenizer": |
37
|
|
|
"filter": |
38
|
|
|
"analyzer": |
39
|
|
|
} |
40
|
|
|
} |
41
|
|
|
} |
42
|
|
|
*/ |
43
|
|
|
public function testEnglishIndexSettings() { |
44
|
|
|
$indexSettings = new EnglishIndexSettings(); |
45
|
|
|
$config = $indexSettings->generateConfig(); |
46
|
|
|
$config = $config['index']; |
47
|
|
|
|
48
|
|
|
// Check filters |
49
|
|
|
$filters = $config['analysis']['filter']; |
50
|
|
|
|
51
|
|
|
$stopwordFilter = $filters['english_stop']; |
52
|
|
|
$this->assertEquals('stop', $stopwordFilter['type']); |
53
|
|
|
$this->assertEquals( |
54
|
|
|
$indexSettings->getStopWords(), |
55
|
|
|
$stopwordFilter['stopwords'] |
56
|
|
|
); |
57
|
|
|
$this->assertFalse(isset($filters['stopword_filter'])); |
58
|
|
|
|
59
|
|
|
$english_stemmer = $filters['english_stemmer']; |
60
|
|
|
$expected = array('type' => 'stemmer', 'language' => 'english'); |
61
|
|
|
$this->assertEquals($expected, $english_stemmer); |
62
|
|
|
|
63
|
|
|
$english_possessive_stemmer = $filters['english_possessive_stemmer']; |
64
|
|
|
$expected = array('type' => 'stemmer', 'language' => 'possessive_english'); |
65
|
|
|
$this->assertEquals($expected, $english_possessive_stemmer); |
66
|
|
|
|
67
|
|
|
$english_snowball = $filters['english_snowball']; |
68
|
|
|
$expected = array('type' => 'snowball', 'language' => 'English'); |
69
|
|
|
$this->assertEquals($expected, $english_snowball); |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
$no_single_chars = $filters['no_single_chars']; |
73
|
|
|
$expected = array('type' => 'length', 'min' => '2'); |
74
|
|
|
$this->assertEquals($expected, $no_single_chars); |
75
|
|
|
|
76
|
|
|
|
77
|
|
|
$english_stemmer = $filters['english_stemmer']; |
78
|
|
|
$expected = array('type' => 'stemmer', 'language' => 'english'); |
79
|
|
|
$this->assertEquals($expected, $english_stemmer); |
80
|
|
|
|
81
|
|
|
$autocomplete = $filters['autocomplete']; |
82
|
|
|
$expected = array( |
83
|
|
|
'type' => 'nGram', |
84
|
|
|
'min_gram' => 2, |
85
|
|
|
'max_gram' => 20, |
86
|
|
|
'token_chars' => array('letter', 'digit', 'punctuation', 'symbol') |
87
|
|
|
); |
88
|
|
|
$this->assertEquals($expected, $autocomplete); |
89
|
|
|
|
90
|
|
|
|
91
|
|
|
$filter_shingle = $filters['filter_shingle']; |
92
|
|
|
$expected = array( |
93
|
|
|
'type' => 'shingle', |
94
|
|
|
'min_shingle_size' => '2', |
95
|
|
|
'max_shingle_size' => '2', |
96
|
|
|
'output_unigrams' => false |
97
|
|
|
); |
98
|
|
|
$this->assertEquals($expected, $filter_shingle); |
99
|
|
|
|
100
|
|
|
// check for existence and then actual values of analyzer |
101
|
|
|
$analyzers = $config['analysis']['analyzer']; |
102
|
|
|
$stemmedAnalyzer = $analyzers['stemmed']; |
103
|
|
|
|
104
|
|
|
$actual = $stemmedAnalyzer['tokenizer']; |
|
|
|
|
105
|
|
|
$filterNames = $stemmedAnalyzer['filter']; |
|
|
|
|
106
|
|
|
|
107
|
|
|
$expected = array('no_single_chars', 'english_snowball', 'lowercase', 'english_stop'); |
|
|
|
|
108
|
|
|
|
109
|
|
|
// check the unstemmed analyzer |
110
|
|
|
$unstemmedAnalyzer = $analyzers['unstemmed']; |
111
|
|
|
$this->assertEquals('custom', $unstemmedAnalyzer['type']); |
112
|
|
|
$this->assertEquals('uax_url_email', $unstemmedAnalyzer['tokenizer']); |
113
|
|
|
|
114
|
|
|
//Difference here is deliberate lack of a stemmer |
115
|
|
|
$expected = array('no_single_chars', 'lowercase', 'english_stop'); |
116
|
|
|
$this->assertEquals($expected, $unstemmedAnalyzer['filter']); |
117
|
|
|
|
118
|
|
|
// Check autocomplete index analyzer |
119
|
|
|
$autocompleteIndexAnalyzer = $analyzers['autocomplete_index_analyzer']; |
120
|
|
|
$expected = array( |
121
|
|
|
'type' => 'custom', |
122
|
|
|
'tokenizer' => 'whitespace', |
123
|
|
|
'filter' => array('lowercase', 'asciifolding', 'autocomplete') |
124
|
|
|
); |
125
|
|
|
$this->assertEquals($expected, $autocompleteIndexAnalyzer); |
126
|
|
|
|
127
|
|
|
|
128
|
|
|
// Check autocomplete search analyzer |
129
|
|
|
$autocompleteSearchAnalyzer = $analyzers['autocomplete_search_analyzer']; |
130
|
|
|
$expected = array( |
131
|
|
|
'type' => 'custom', |
132
|
|
|
'tokenizer' => 'whitespace', |
133
|
|
|
'filter' => array('lowercase', 'asciifolding') |
134
|
|
|
); |
135
|
|
|
$this->assertEquals($expected, $autocompleteSearchAnalyzer); |
136
|
|
|
|
137
|
|
|
// Check shingles analyzer |
138
|
|
|
$shinglesAnalyzer = $analyzers['shingles']; |
139
|
|
|
$expected = array( |
140
|
|
|
'type' => 'custom', |
141
|
|
|
'tokenizer' => 'uax_url_email', |
142
|
|
|
'filter' => array('lowercase', 'filter_shingle') |
143
|
|
|
); |
144
|
|
|
|
145
|
|
|
$this->assertEquals($expected, $shinglesAnalyzer); |
146
|
|
|
|
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
|
150
|
|
|
|
151
|
|
|
public function testGetSetAsciiFolding() { |
152
|
|
|
$indexSettings = new EnglishIndexSettings(); |
153
|
|
|
$indexSettings->setAsciiFolding(false); |
154
|
|
|
$this->assertFalse($indexSettings->getAsciiFolding()); |
155
|
|
|
$indexSettings->setAsciiFolding(true); |
156
|
|
|
$this->assertTrue($indexSettings->getAsciiFolding()); |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
|
160
|
|
|
|
161
|
|
|
} |
162
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.