1 | <?php |
||
8 | class ElasticaUtil { |
||
9 | |||
10 | /** |
||
11 | * Marker string for pre highlight - can be any string unlikely to appear in a search |
||
12 | */ |
||
13 | private static $pre_marker = " |PREZXCVBNM12345678"; |
||
14 | |||
15 | /** |
||
16 | * Marker string for psot highlight - can be any string unlikely to appear in a search |
||
17 | */ |
||
18 | private static $post_marker = "POSTZXCVBNM12345678| "; |
||
19 | |||
20 | /** |
||
21 | * @var boolean true to show CLI output, false to hide |
||
22 | */ |
||
23 | private static $cli_printer_output = true; |
||
24 | |||
25 | |||
26 | /** |
||
27 | * Function to display messages only if using the command line |
||
28 | * @var string $content Text to display when in command line mode |
||
29 | */ |
||
30 | 1 | public static function message($content) { |
|
35 | |||
36 | |||
37 | /* |
||
38 | Display a human readable yes or no |
||
39 | */ |
||
40 | public static function showBooleanHumanReadable($assertion) { |
||
43 | |||
44 | |||
45 | public static function getPhraseSuggestion($alternativeQuerySuggestions) { |
||
46 | $originalQuery = $alternativeQuerySuggestions[0]['text']; |
||
47 | |||
48 | $highlightsCfg = \Config::inst()->get('Elastica', 'Highlights'); |
||
49 | $preTags = $highlightsCfg['PreTags']; |
||
50 | $postTags = $highlightsCfg['PostTags']; |
||
51 | |||
52 | //Use the first suggested phrase |
||
53 | $options = $alternativeQuerySuggestions[0]['options']; |
||
54 | |||
55 | $resultArray = null; |
||
56 | if (sizeof($options) > 0) { |
||
57 | //take the first suggestion |
||
58 | $suggestedPhrase = $options[0]['text']; |
||
59 | $suggestedPhraseHighlighted = $options[0]['highlighted']; |
||
60 | |||
61 | // now need to fix capitalisation |
||
62 | $originalParts = explode(' ', $originalQuery); |
||
63 | $suggestedParts = explode(' ', $suggestedPhrase); |
||
64 | |||
65 | $markedHighlightedParts = ' '.$suggestedPhraseHighlighted.' '; |
||
66 | $markedHighlightedParts = str_replace(' '.$preTags, ' '.self::$pre_marker, $markedHighlightedParts); |
||
67 | |||
68 | $markedHighlightedParts = str_replace($postTags.' ', self::$post_marker, $markedHighlightedParts); |
||
69 | |||
70 | $markedHighlightedParts = trim($markedHighlightedParts); |
||
71 | $markedHighlightedParts = trim($markedHighlightedParts); |
||
72 | |||
73 | $highlightedParts = preg_split('/\s+/', $markedHighlightedParts); |
||
74 | |||
75 | //Create a mapping of lowercase to uppercase terms |
||
76 | $lowerToUpper = array(); |
||
77 | $lowerToHighlighted = array(); |
||
78 | $ctr = 0; |
||
79 | foreach ($suggestedParts as $lowercaseWord) { |
||
80 | $lowerToUpper[$lowercaseWord] = $originalParts[$ctr]; |
||
81 | $lowerToHighlighted[$lowercaseWord] = $highlightedParts[$ctr]; |
||
82 | $ctr++; |
||
83 | } |
||
84 | |||
85 | $plain = array(); |
||
86 | $highlighted = array(); |
||
87 | foreach ($suggestedParts as $lowercaseWord) { |
||
88 | $possiblyUppercase = $lowerToUpper[$lowercaseWord]; |
||
89 | $possiblyUppercaseHighlighted = $lowerToHighlighted[$lowercaseWord]; |
||
90 | |||
91 | //If the terms are identical other than case, e.g. new => New, then simply swap |
||
92 | if (strtolower($possiblyUppercase) == $lowercaseWord) { |
||
93 | array_push($plain, $possiblyUppercase); |
||
94 | array_push($highlighted, $possiblyUppercase); |
||
95 | } else { |
||
96 | //Need to check capitalisation of terms suggested that are different |
||
97 | |||
98 | $chr = mb_substr ($possiblyUppercase, 0, 1, "UTF-8"); |
||
99 | if (mb_strtolower($chr, "UTF-8") != $chr) { |
||
100 | $upperLowercaseWord = $lowercaseWord; |
||
101 | $upperLowercaseWord[0] = $chr; |
||
102 | |||
103 | //$possiblyUppercaseHighlighted = str_replace($lowercaseWord, $possiblyUppercase, $possiblyUppercaseHighlighted); |
||
104 | $withHighlights = str_replace($lowercaseWord, $upperLowercaseWord, $possiblyUppercaseHighlighted); |
||
105 | |||
106 | $lowercaseWord[0] = $chr; |
||
107 | |||
108 | //str_replace(search, replace, subject) |
||
109 | |||
110 | array_push($plain, $lowercaseWord); |
||
111 | array_push($highlighted, $withHighlights); |
||
112 | } else { |
||
113 | //No need to capitalise, so add suggested word |
||
114 | array_push($plain, $lowercaseWord); |
||
115 | |||
116 | //No need to capitalise, so add suggested highlighted word |
||
117 | array_push($highlighted, $possiblyUppercaseHighlighted); |
||
118 | } |
||
119 | } |
||
120 | } |
||
121 | |||
122 | $highlighted = ' '.implode(' ', $highlighted).' '; |
||
123 | $highlighted = str_replace(self::$pre_marker, ' '.$preTags, $highlighted); |
||
124 | $highlighted = str_replace(self::$post_marker, $postTags.' ', $highlighted); |
||
125 | |||
126 | $resultArray['suggestedQuery'] = implode(' ', $plain); |
||
127 | $resultArray['suggestedQueryHighlighted'] = trim($highlighted); |
||
128 | } |
||
129 | return $resultArray; |
||
130 | } |
||
131 | |||
132 | |||
133 | /** |
||
134 | * The output format of this function is not documented, so at best this is guess work to an |
||
135 | * extent. Possible formats are: |
||
136 | * - ((Title.standard:great Content.standard:ammunition Content.standard:could |
||
137 | * Content.standard:bair Content.standard:dancing Content.standard:column |
||
138 | * Content.standard:company Content.standard:infantry Content.standard:men |
||
139 | * Content.standard:soldier Content.standard:brigade Content.standard:zealand |
||
140 | * Content.standard:new)~3) |
||
141 | * -ConstantScore(_uid:GutenbergBookExtract#1519) |
||
142 | * (Description: bay Description: mannerstram) |
||
143 | * |
||
144 | * @param string $explanation explanation string for more like this terms from Elasticsearch |
||
145 | * @return array Array of fieldnames mapped to terms |
||
146 | */ |
||
147 | 1 | public static function parseSuggestionExplanation($explanation) { |
|
148 | |||
149 | 1 | $explanation = explode('-ConstantScore', $explanation)[0]; |
|
150 | |||
151 | 1 | $bracketPos = strpos($explanation, ')~'); |
|
152 | |||
153 | 1 | if (substr($explanation, 0,2) == '((') { |
|
154 | 1 | $explanation = substr($explanation, 2, $bracketPos-2); |
|
155 | 1 | } elseif (substr($explanation, 0,1) == '(') { |
|
156 | $explanation = substr($explanation, 1, $bracketPos-2); |
||
157 | } |
||
158 | |||
159 | 1 | $terms = array(); |
|
160 | |||
161 | //Field name(s) => terms |
||
162 | 1 | $splits = explode(' ', $explanation); |
|
163 | |||
164 | 1 | foreach ($splits as $fieldAndTerm) { |
|
165 | 1 | $splits = explode(':', $fieldAndTerm); |
|
166 | |||
167 | // This is the no terms case |
||
168 | 1 | if (sizeof($splits) < 2) { |
|
169 | break; |
||
170 | } |
||
171 | |||
172 | 1 | $fieldname = $splits[0]; |
|
173 | 1 | $term = $splits[1]; |
|
174 | |||
175 | 1 | if (!isset($terms[$fieldname])) { |
|
176 | 1 | $terms[$fieldname] = array(); |
|
177 | 1 | } |
|
178 | |||
179 | 1 | array_push($terms[$fieldname], $term); |
|
180 | 1 | } |
|
181 | |||
182 | 1 | return $terms; |
|
183 | } |
||
184 | |||
185 | /** |
||
186 | * Add attributes necessary for jQuery to execute autocomplete |
||
187 | * @param FormField &$queryField field used to type a search query |
||
188 | */ |
||
189 | public static function addAutocompleteToQueryField(&$queryField, $classesToSearch, $siteTreeOnly, $link, $slug) { |
||
197 | |||
198 | /** |
||
199 | * @return function print content to either web browser or command line. Can be optionally supressed |
||
200 | */ |
||
201 | 1 | public static function getPrinter() { |
|
209 | |||
210 | /** |
||
211 | * Set to true to show output on the command line or browser, false to not |
||
212 | * |
||
213 | * @param $newcli_printer_output true to show output, false to hide it |
||
214 | */ |
||
215 | public static function setPrinterOutput($new_cli_printer_output) { |
||
218 | } |
||
219 |
When comparing two booleans, it is generally considered safer to use the strict comparison operator.