1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SilverStripe\Elastica; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Utility methods to help with searching functions, and also testable without fixtures |
7
|
|
|
*/ |
8
|
|
|
class ElasticaUtil { |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Marker string for pre highlight - can be any string unlikely to appear in a search |
12
|
|
|
*/ |
13
|
|
|
private static $pre_marker = " |PREZXCVBNM12345678"; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* Marker string for psot highlight - can be any string unlikely to appear in a search |
17
|
|
|
*/ |
18
|
|
|
private static $post_marker = "POSTZXCVBNM12345678| "; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var boolean true to show CLI output, false to hide |
22
|
|
|
*/ |
23
|
|
|
private static $cli_printer_output = true; |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* Function to display messages only if using the command line |
28
|
|
|
* @var string $content Text to display when in command line mode |
29
|
|
|
*/ |
30
|
1 |
|
public static function message($content) { |
31
|
1 |
|
if (\Director::is_cli() && self::$cli_printer_output == true) { |
|
|
|
|
32
|
|
|
echo "$content\n"; |
33
|
|
|
} |
34
|
1 |
|
} |
35
|
|
|
|
36
|
|
|
|
37
|
|
|
/* |
38
|
|
|
Display a human readable yes or no |
39
|
|
|
*/ |
40
|
|
|
public static function showBooleanHumanReadable($assertion) { |
41
|
|
|
return $assertion ? 'Yes' : 'No'; |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
|
45
|
|
|
public static function getPhraseSuggestion($alternativeQuerySuggestions) { |
46
|
|
|
$originalQuery = $alternativeQuerySuggestions[0]['text']; |
47
|
|
|
|
48
|
|
|
$highlightsCfg = \Config::inst()->get('Elastica', 'Highlights'); |
49
|
|
|
$preTags = $highlightsCfg['PreTags']; |
50
|
|
|
$postTags = $highlightsCfg['PostTags']; |
51
|
|
|
|
52
|
|
|
//Use the first suggested phrase |
53
|
|
|
$options = $alternativeQuerySuggestions[0]['options']; |
54
|
|
|
|
55
|
|
|
$resultArray = null; |
56
|
|
|
if (sizeof($options) > 0) { |
57
|
|
|
//take the first suggestion |
58
|
|
|
$suggestedPhrase = $options[0]['text']; |
59
|
|
|
$suggestedPhraseHighlighted = $options[0]['highlighted']; |
60
|
|
|
|
61
|
|
|
// now need to fix capitalisation |
62
|
|
|
$originalParts = explode(' ', $originalQuery); |
63
|
|
|
$suggestedParts = explode(' ', $suggestedPhrase); |
64
|
|
|
|
65
|
|
|
$markedHighlightedParts = ' '.$suggestedPhraseHighlighted.' '; |
66
|
|
|
$markedHighlightedParts = str_replace(' '.$preTags, ' '.self::$pre_marker, $markedHighlightedParts); |
67
|
|
|
|
68
|
|
|
$markedHighlightedParts = str_replace($postTags.' ', self::$post_marker, $markedHighlightedParts); |
69
|
|
|
|
70
|
|
|
$markedHighlightedParts = trim($markedHighlightedParts); |
71
|
|
|
$markedHighlightedParts = trim($markedHighlightedParts); |
72
|
|
|
|
73
|
|
|
$highlightedParts = preg_split('/\s+/', $markedHighlightedParts); |
74
|
|
|
|
75
|
|
|
//Create a mapping of lowercase to uppercase terms |
76
|
|
|
$lowerToUpper = array(); |
77
|
|
|
$lowerToHighlighted = array(); |
78
|
|
|
$ctr = 0; |
79
|
|
|
foreach ($suggestedParts as $lowercaseWord) { |
80
|
|
|
$lowerToUpper[$lowercaseWord] = $originalParts[$ctr]; |
81
|
|
|
$lowerToHighlighted[$lowercaseWord] = $highlightedParts[$ctr]; |
82
|
|
|
$ctr++; |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
$plain = array(); |
86
|
|
|
$highlighted = array(); |
87
|
|
|
foreach ($suggestedParts as $lowercaseWord) { |
88
|
|
|
$possiblyUppercase = $lowerToUpper[$lowercaseWord]; |
89
|
|
|
$possiblyUppercaseHighlighted = $lowerToHighlighted[$lowercaseWord]; |
90
|
|
|
|
91
|
|
|
//If the terms are identical other than case, e.g. new => New, then simply swap |
92
|
|
|
if (strtolower($possiblyUppercase) == $lowercaseWord) { |
93
|
|
|
array_push($plain, $possiblyUppercase); |
94
|
|
|
array_push($highlighted, $possiblyUppercase); |
95
|
|
|
} else { |
96
|
|
|
//Need to check capitalisation of terms suggested that are different |
97
|
|
|
|
98
|
|
|
$chr = mb_substr ($possiblyUppercase, 0, 1, "UTF-8"); |
99
|
|
|
if (mb_strtolower($chr, "UTF-8") != $chr) { |
100
|
|
|
$upperLowercaseWord = $lowercaseWord; |
101
|
|
|
$upperLowercaseWord[0] = $chr; |
102
|
|
|
|
103
|
|
|
//$possiblyUppercaseHighlighted = str_replace($lowercaseWord, $possiblyUppercase, $possiblyUppercaseHighlighted); |
|
|
|
|
104
|
|
|
$withHighlights = str_replace($lowercaseWord, $upperLowercaseWord, $possiblyUppercaseHighlighted); |
105
|
|
|
|
106
|
|
|
$lowercaseWord[0] = $chr; |
107
|
|
|
|
108
|
|
|
//str_replace(search, replace, subject) |
|
|
|
|
109
|
|
|
|
110
|
|
|
array_push($plain, $lowercaseWord); |
111
|
|
|
array_push($highlighted, $withHighlights); |
112
|
|
|
} else { |
113
|
|
|
//No need to capitalise, so add suggested word |
114
|
|
|
array_push($plain, $lowercaseWord); |
115
|
|
|
|
116
|
|
|
//No need to capitalise, so add suggested highlighted word |
117
|
|
|
array_push($highlighted, $possiblyUppercaseHighlighted); |
118
|
|
|
} |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
$highlighted = ' '.implode(' ', $highlighted).' '; |
123
|
|
|
$highlighted = str_replace(self::$pre_marker, ' '.$preTags, $highlighted); |
124
|
|
|
$highlighted = str_replace(self::$post_marker, $postTags.' ', $highlighted); |
125
|
|
|
|
126
|
|
|
$resultArray['suggestedQuery'] = implode(' ', $plain); |
127
|
|
|
$resultArray['suggestedQueryHighlighted'] = trim($highlighted); |
128
|
|
|
} |
129
|
|
|
return $resultArray; |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
/** |
134
|
|
|
* The output format of this function is not documented, so at best this is guess work to an |
135
|
|
|
* extent. Possible formats are: |
136
|
|
|
* - ((Title.standard:great Content.standard:ammunition Content.standard:could |
137
|
|
|
* Content.standard:bair Content.standard:dancing Content.standard:column |
138
|
|
|
* Content.standard:company Content.standard:infantry Content.standard:men |
139
|
|
|
* Content.standard:soldier Content.standard:brigade Content.standard:zealand |
140
|
|
|
* Content.standard:new)~3) |
141
|
|
|
* -ConstantScore(_uid:GutenbergBookExtract#1519) |
142
|
|
|
* (Description: bay Description: mannerstram) |
143
|
|
|
* |
144
|
|
|
* @param string $explanation explanation string for more like this terms from Elasticsearch |
145
|
|
|
* @return array Array of fieldnames mapped to terms |
146
|
|
|
*/ |
147
|
1 |
|
public static function parseSuggestionExplanation($explanation) { |
148
|
|
|
|
149
|
1 |
|
$explanation = explode('-ConstantScore', $explanation)[0]; |
150
|
|
|
|
151
|
1 |
|
$bracketPos = strpos($explanation, ')~'); |
152
|
|
|
|
153
|
1 |
|
if (substr($explanation, 0,2) == '((') { |
154
|
1 |
|
$explanation = substr($explanation, 2, $bracketPos-2); |
155
|
1 |
|
} elseif (substr($explanation, 0,1) == '(') { |
156
|
|
|
$explanation = substr($explanation, 1, $bracketPos-2); |
157
|
|
|
} |
158
|
|
|
|
159
|
1 |
|
$terms = array(); |
160
|
|
|
|
161
|
|
|
//Field name(s) => terms |
162
|
1 |
|
$splits = explode(' ', $explanation); |
163
|
|
|
|
164
|
1 |
|
foreach ($splits as $fieldAndTerm) { |
165
|
1 |
|
$splits = explode(':', $fieldAndTerm); |
166
|
|
|
|
167
|
|
|
// This is the no terms case |
168
|
1 |
|
if (sizeof($splits) < 2) { |
169
|
|
|
break; |
170
|
|
|
} |
171
|
|
|
|
172
|
1 |
|
$fieldname = $splits[0]; |
173
|
1 |
|
$term = $splits[1]; |
174
|
|
|
|
175
|
1 |
|
if (!isset($terms[$fieldname])) { |
176
|
1 |
|
$terms[$fieldname] = array(); |
177
|
1 |
|
} |
178
|
|
|
|
179
|
1 |
|
array_push($terms[$fieldname], $term); |
180
|
1 |
|
} |
181
|
|
|
|
182
|
1 |
|
return $terms; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* Add attributes necessary for jQuery to execute autocomplete |
187
|
|
|
* @param FormField &$queryField field used to type a search query |
188
|
|
|
*/ |
189
|
|
|
public static function addAutocompleteToQueryField(&$queryField, $classesToSearch, $siteTreeOnly, $link, $slug) { |
190
|
|
|
$queryField->setAttribute('data-autocomplete', 'true'); |
191
|
|
|
$queryField->setAttribute('data-autocomplete-field', 'Title'); |
192
|
|
|
$queryField->setAttribute('data-autocomplete-classes', $classesToSearch); |
193
|
|
|
$queryField->setAttribute('data-autocomplete-sitetree', $siteTreeOnly); |
194
|
|
|
$queryField->setAttribute('data-autocomplete-source', $link); |
195
|
|
|
$queryField->setAttribute('data-autocomplete-function', $slug); |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* @return function print content to either web browser or command line. Can be optionally supressed |
200
|
|
|
*/ |
201
|
1 |
|
public static function getPrinter() { |
202
|
1 |
|
return function ($content) { |
203
|
1 |
|
if (self::$cli_printer_output == true) { |
|
|
|
|
204
|
|
|
print(\Director::is_cli() ? "T1 $content\n" : "T2 <p>$content</p>"); |
205
|
|
|
} |
206
|
|
|
|
207
|
1 |
|
}; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* Set to true to show output on the command line or browser, false to not |
212
|
|
|
* |
213
|
|
|
* @param $newcli_printer_output true to show output, false to hide it |
214
|
|
|
*/ |
215
|
|
|
public static function setPrinterOutput($new_cli_printer_output) { |
|
|
|
|
216
|
|
|
self::$cli_printer_output = $new_cli_printer_output; |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
|
When comparing two booleans, it is generally considered safer to use the strict comparison operator.