1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Licensed under The GPL-3.0 License |
4
|
|
|
* For full copyright and license information, please see the LICENSE.txt |
5
|
|
|
* Redistributions of files must retain the above copyright notice. |
6
|
|
|
* |
7
|
|
|
* @since 2.0.0 |
8
|
|
|
* @author Christopher Castro <[email protected]> |
9
|
|
|
* @link http://www.quickappscms.org |
10
|
|
|
* @license http://opensource.org/licenses/gpl-3.0.html GPL-3.0 License |
11
|
|
|
*/ |
12
|
|
|
namespace Search\Engine\Generic; |
13
|
|
|
|
14
|
|
|
use Cake\Cache\Cache; |
15
|
|
|
use Cake\Core\InstanceConfigTrait; |
16
|
|
|
use Cake\Datasource\EntityInterface; |
17
|
|
|
use Cake\Error\FatalErrorException; |
18
|
|
|
use Cake\Event\Event; |
19
|
|
|
use Cake\Event\EventManager; |
20
|
|
|
use Cake\ORM\Query; |
21
|
|
|
use Cake\ORM\Table; |
22
|
|
|
use Cake\Utility\Hash; |
23
|
|
|
use Cake\Utility\Inflector; |
24
|
|
|
use Search\Engine\BaseEngine; |
25
|
|
|
use Search\Engine\Generic\Exception\CompoundPrimaryKeyException; |
26
|
|
|
use Search\Parser\MiniLanguage\MiniLanguageParser; |
27
|
|
|
use Search\Parser\TokenInterface; |
28
|
|
|
use \ArrayObject; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* This Search Engine allows entities to be searchable through an auto-generated |
32
|
|
|
* list of words. |
33
|
|
|
* |
34
|
|
|
* ## Using Generic Engine |
35
|
|
|
* |
36
|
|
|
* You must indicate Searchable behavior to use this engine, for example when |
37
|
|
|
* attaching Searchable behavior to `Articles` table: |
38
|
|
|
* |
39
|
|
|
* ```php |
40
|
|
|
* $this->addBehavior('Search.Searchable', [ |
41
|
|
|
* 'engine' => [ |
42
|
|
|
* 'className' => 'Search\Engine\Generic\GenericEngine', |
43
|
|
|
* 'config' => [ |
44
|
|
|
* 'bannedWords' => [] |
45
|
|
|
* ] |
46
|
|
|
* ] |
47
|
|
|
* ]); |
48
|
|
|
* ``` |
49
|
|
|
* |
50
|
|
|
* This engine will apply a series of filters (converts to lowercase, remove line |
51
|
|
|
* breaks, etc) to words list extracted from each entity being indexed. |
52
|
|
|
* |
53
|
|
|
* ### Banned Words |
54
|
|
|
* |
55
|
|
|
* You can use the `bannedWords` option to tell which words should not be indexed by |
56
|
|
|
* this engine. For example: |
57
|
|
|
* |
58
|
|
|
* ```php |
59
|
|
|
* $this->addBehavior('Search.Searchable', [ |
60
|
|
|
* 'engine' => [ |
61
|
|
|
* 'className' => 'Search\Engine\Generic\GenericEngine', |
62
|
|
|
* 'config' => [ |
63
|
|
|
* 'bannedWords' => ['of', 'the', 'and'] |
64
|
|
|
* ] |
65
|
|
|
* ] |
66
|
|
|
* ]); |
67
|
|
|
* ``` |
68
|
|
|
* |
69
|
|
|
* If you need to ban a really specific list of words you can set `bannedWords` |
70
|
|
|
* option as a callable method that should return true or false to tell if a words |
71
|
|
|
* should be indexed or not. For example: |
72
|
|
|
* |
73
|
|
|
* ```php |
74
|
|
|
* $this->addBehavior('Search.Searchable', [ |
75
|
|
|
* 'engine' => [ |
76
|
|
|
* 'className' => 'Search\Engine\Generic\GenericEngine', |
77
|
|
|
* 'config' => [ |
78
|
|
|
* 'bannedWords' => function ($word) { |
79
|
|
|
* return strlen($word) > 3; |
80
|
|
|
* } |
81
|
|
|
* ] |
82
|
|
|
* ] |
83
|
|
|
* ]); |
84
|
|
|
* ``` |
85
|
|
|
* |
86
|
|
|
* - Returning TRUE indicates that the word is safe for indexing (not banned). |
87
|
|
|
* - Returning FALSE indicates that the word should NOT be indexed (banned). |
88
|
|
|
* |
89
|
|
|
* In the example, above any word of 4 or more characters will be indexed |
90
|
|
|
* (e.g. "home", "name", "quickapps", etc). Any word of 3 or less characters will |
91
|
|
|
* be banned (e.g. "and", "or", "the"). |
92
|
|
|
* |
93
|
|
|
* ## Searching Entities |
94
|
|
|
* |
95
|
|
|
* When using this engine, every entity under your table gets a list of indexed |
96
|
|
|
* words. The idea behind this is that you can use this list of words to locate any |
97
|
|
|
* entity based on a customized search-criteria. A search-criteria looks as follow: |
98
|
|
|
* |
99
|
|
|
* "this phrase" OR -"not this one" AND this |
100
|
|
|
* |
101
|
|
|
* --- |
102
|
|
|
* |
103
|
|
|
* Use wildcard searches to broaden results; asterisk (`*`) matches any one or |
104
|
|
|
* more characters, exclamation mark (`!`) matches any single character: |
105
|
|
|
* |
106
|
|
|
* "this *rase" OR -"not th!! one" AND thi! |
107
|
|
|
* |
108
|
|
|
* Anything containing space (" ") characters must be wrapper between quotation |
109
|
|
|
* marks: |
110
|
|
|
* |
111
|
|
|
* "this phrase" special_operator:"[100 to 500]" -word -"more words" -word_1 word_2 |
112
|
|
|
* |
113
|
|
|
* The search criteria above will be treated as it were composed by the |
114
|
|
|
* following parts: |
115
|
|
|
* |
116
|
|
|
* - `this phrase` |
117
|
|
|
* - `special_operator:[100 to 500]` |
118
|
|
|
* - `-word` |
119
|
|
|
* - `-more words` |
120
|
|
|
* - `-word_1` |
121
|
|
|
* - `word_2` |
122
|
|
|
* |
123
|
|
|
* --- |
124
|
|
|
* |
125
|
|
|
* Search criteria allows you to perform complex search conditions in a |
126
|
|
|
* human-readable way. Allows you, for example, create user-friendly search-forms, |
127
|
|
|
* or create some RSS feed just by creating a friendly URL using a search-criteria. |
128
|
|
|
* e.g.: `http://example.com/rss/category:art date:>2014-01-01` |
129
|
|
|
* |
130
|
|
|
* You must use the `search()` method to scope any query using a search-criteria. |
131
|
|
|
* For example, in one controller using `Users` model: |
132
|
|
|
* |
133
|
|
|
* ```php |
134
|
|
|
* $criteria = '"this phrase" OR -"not this one" AND this'; |
135
|
|
|
* $query = $this->Users->find(); |
136
|
|
|
* $query = $this->Users->search($criteria, $query); |
137
|
|
|
* ``` |
138
|
|
|
* |
139
|
|
|
* The above will alter the given $query object according to the given criteria. |
140
|
|
|
* The second argument (query object) is optional, if not provided this Behavior |
141
|
|
|
* automatically generates a find-query for you. Previous example and the one |
142
|
|
|
* below are equivalent: |
143
|
|
|
* |
144
|
|
|
* ```php |
145
|
|
|
* $criteria = '"this phrase" OR -"not this one" AND this'; |
146
|
|
|
* $query = $this->Users->search($criteria); |
147
|
|
|
* ``` |
148
|
|
|
*/ |
149
|
|
|
class GenericEngine extends BaseEngine |
150
|
|
|
{ |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* {@inheritDoc} |
154
|
|
|
* |
155
|
|
|
* - operators: A list of registered operators methods as `name` => |
156
|
|
|
* `methodName`. |
157
|
|
|
* |
158
|
|
|
* - strict: Used to filter any invalid word. Set to a string representing a |
159
|
|
|
* regular expression describing which charaters should be removed. Or set |
160
|
|
|
* to TRUE to used default discard criteria: only letters, digits and few |
161
|
|
|
* basic symbols (".", ",", "/", etc). Defaults to TRUE (custom filter |
162
|
|
|
* regex). |
163
|
|
|
* |
164
|
|
|
* - bannedWords: Array list of banned words, or a callable that should decide |
165
|
|
|
* if the given word is banned or not. Defaults to empty array (allow |
166
|
|
|
* everything). |
167
|
|
|
* |
168
|
|
|
* - fulltext: Whether to use FULLTEXT search whenever it is possible. Defaults to |
169
|
|
|
* TRUE. This feature is only supported for MySQL InnoDB database engines. |
170
|
|
|
* |
171
|
|
|
* - datasetTable: Name of the MySQL table where words dataset should be stored and |
172
|
|
|
* read from. This allows you to split large sets into different tables. |
173
|
|
|
*/ |
174
|
|
|
protected $_defaultConfig = [ |
175
|
|
|
'operators' => [], |
176
|
|
|
'strict' => true, |
177
|
|
|
'bannedWords' => [], |
178
|
|
|
'fulltext' => true, |
179
|
|
|
'datasetTable' => 'search_datasets', |
180
|
|
|
]; |
181
|
|
|
|
182
|
|
|
/** |
183
|
|
|
* {@inheritDoc} |
184
|
|
|
* |
185
|
|
|
* @throws \Search\Engine\Generic\Exception\CompoundPrimaryKeyException When using |
186
|
|
|
* compound primary keys |
187
|
|
|
*/ |
188
|
|
|
public function __construct(Table $table, array $config = []) |
189
|
|
|
{ |
190
|
|
|
$config['tableAlias'] = (string)Inflector::underscore($table->table()); |
|
|
|
|
191
|
|
|
$config['pk'] = $table->primaryKey(); |
|
|
|
|
192
|
|
|
if (is_array($config['pk'])) { |
193
|
|
|
throw new CompoundPrimaryKeyException($config['tableAlias']); |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
$table->hasOne('Search.SearchDatasets', [ |
197
|
|
|
'foreignKey' => 'entity_id', |
198
|
|
|
'joinType' => 'INNER', |
199
|
|
|
'conditions' => [ |
200
|
|
|
'SearchDatasets.table_alias' => $config['tableAlias'], |
201
|
|
|
], |
202
|
|
|
'dependent' => true |
203
|
|
|
]); |
204
|
|
|
|
205
|
|
|
$this->_table->SearchDatasets->table($this->config('datasetTable')); |
|
|
|
|
206
|
|
|
parent::__construct($table, $config); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
/** |
210
|
|
|
* {@inheritDoc} |
211
|
|
|
*/ |
212
|
|
|
public function index(EntityInterface $entity) |
213
|
|
|
{ |
214
|
|
|
$set = $this->_table->SearchDatasets->find() |
215
|
|
|
->where([ |
216
|
|
|
'entity_id' => $this->_entityId($entity), |
|
|
|
|
217
|
|
|
'table_alias' => $this->config('tableAlias'), |
|
|
|
|
218
|
|
|
]) |
219
|
|
|
->limit(1) |
220
|
|
|
->first(); |
221
|
|
|
|
222
|
|
|
if (!$set) { |
223
|
|
|
$set = $this->_table->SearchDatasets->newEntity([ |
224
|
|
|
'entity_id' => $this->_entityId($entity), |
|
|
|
|
225
|
|
|
'table_alias' => $this->config('tableAlias'), |
|
|
|
|
226
|
|
|
'words' => '', |
227
|
|
|
]); |
228
|
|
|
} |
229
|
|
|
|
230
|
|
|
// We add starting and trailing space to allow LIKE %something-to-match% |
231
|
|
|
$set = $this->_table->SearchDatasets->patchEntity($set, [ |
232
|
|
|
'words' => ' ' . $this->_extractEntityWords($entity) . ' ' |
233
|
|
|
]); |
234
|
|
|
|
235
|
|
|
return (bool)$this->_table->SearchDatasets->save($set); |
236
|
|
|
} |
237
|
|
|
|
238
|
|
|
/** |
239
|
|
|
* {@inheritDoc} |
240
|
|
|
*/ |
241
|
|
|
public function delete(EntityInterface $entity) |
242
|
|
|
{ |
243
|
|
|
$this->_table->SearchDatasets->deleteAll([ |
244
|
|
|
'entity_id' => $this->_entityId($entity), |
|
|
|
|
245
|
|
|
'table_alias' => $this->config('tableAlias'), |
|
|
|
|
246
|
|
|
]); |
247
|
|
|
|
248
|
|
|
return true; |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* {@inheritDoc} |
253
|
|
|
*/ |
254
|
|
|
public function get(EntityInterface $entity) |
255
|
|
|
{ |
256
|
|
|
return $this->_table->SearchDatasets->find() |
257
|
|
|
->where([ |
258
|
|
|
'entity_id' => $this->_entityId($entity), |
|
|
|
|
259
|
|
|
'table_alias' => $this->config('tableAlias'), |
|
|
|
|
260
|
|
|
]) |
261
|
|
|
->limit(1) |
262
|
|
|
->first(); |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
/** |
266
|
|
|
* {@inheritDoc} |
267
|
|
|
* |
268
|
|
|
* It looks for search-criteria and applies them over the query object. For |
269
|
|
|
* example, given the criteria below: |
270
|
|
|
* |
271
|
|
|
* "this phrase" -"and not this one" |
272
|
|
|
* |
273
|
|
|
* Alters the query object as follow: |
274
|
|
|
* |
275
|
|
|
* ```php |
276
|
|
|
* $query->where([ |
277
|
|
|
* 'indexed_words LIKE' => '%this phrase%', |
278
|
|
|
* 'indexed_words NOT LIKE' => '%and not this one%' |
279
|
|
|
* ]); |
280
|
|
|
* ``` |
281
|
|
|
* |
282
|
|
|
* The `AND` & `OR` keywords are allowed to create complex conditions. For |
283
|
|
|
* example: |
284
|
|
|
* |
285
|
|
|
* "this phrase" OR -"and not this one" AND "this" |
286
|
|
|
* |
287
|
|
|
* Will produce something like: |
288
|
|
|
* |
289
|
|
|
* ```php |
290
|
|
|
* $query->where(['indexed_words LIKE' => '%this phrase%']) |
291
|
|
|
* ->orWhere(['indexed_words NOT LIKE' => '%and not this one%']); |
292
|
|
|
* ->andWhere(['indexed_words LIKE' => '%this%']); |
293
|
|
|
* ``` |
294
|
|
|
*/ |
295
|
|
|
public function search($criteria, Query $query) |
296
|
|
|
{ |
297
|
|
|
$tokens = (array)(new MiniLanguageParser($criteria))->parse(); |
298
|
|
|
if (!empty($tokens)) { |
299
|
|
|
$query->innerJoinWith('SearchDatasets'); |
300
|
|
|
|
301
|
|
|
foreach ($tokens as $token) { |
302
|
|
|
if ($token->isOperator()) { |
303
|
|
|
$query = $this->_scopeOperator($query, $token); |
304
|
|
|
} else { |
305
|
|
|
$query = $this->_scopeWords($query, $token); |
306
|
|
|
} |
307
|
|
|
} |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
return $query; |
311
|
|
|
} |
312
|
|
|
|
313
|
|
|
/** |
314
|
|
|
* Scopes the given query using the given operator token. |
315
|
|
|
* |
316
|
|
|
* @param \Cake\ORM\Query $query The query to scope |
317
|
|
|
* @param \Search\Token $token Token describing an operator. e.g `-op_name:op_value` |
318
|
|
|
* @return \Cake\ORM\Query Scoped query |
319
|
|
|
*/ |
320
|
|
|
protected function _scopeOperator(Query $query, TokenInterface $token) |
321
|
|
|
{ |
322
|
|
|
return $this->_table->applySearchOperator($query, $token); |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/** |
326
|
|
|
* Scopes the given query using the given words token. |
327
|
|
|
* |
328
|
|
|
* @param \Cake\ORM\Query $query The query to scope |
329
|
|
|
* @param \Search\TokenInterface $token Token describing a words sequence. e.g `this is a phrase` |
330
|
|
|
* @return \Cake\ORM\Query Scoped query |
331
|
|
|
*/ |
332
|
|
|
protected function _scopeWords(Query $query, TokenInterface $token) |
333
|
|
|
{ |
334
|
|
|
if ($this->_isFullTextEnabled()) { |
335
|
|
|
return $this->_scopeWordsInFulltext($query, $token); |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
$like = 'LIKE'; |
339
|
|
|
if ($token->negated()) { |
340
|
|
|
$like = 'NOT LIKE'; |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
// * Matches any one or more characters. |
344
|
|
|
// ! Matches any single character. |
345
|
|
|
$value = str_replace(['*', '!'], ['%', '_'], $token->value()); |
346
|
|
|
|
347
|
|
|
if ($token->where() === 'or') { |
348
|
|
|
$query->orWhere(["SearchDatasets.words {$like}" => "%{$value}%"]); |
349
|
|
|
} elseif ($token->where() === 'and') { |
350
|
|
|
$query->andWhere(["SearchDatasets.words {$like}" => "%{$value}%"]); |
351
|
|
|
} else { |
352
|
|
|
$query->where(["SearchDatasets.words {$like}" => "%{$value}%"]); |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
return $query; |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
/** |
359
|
|
|
* Similar to "_scopeWords" but using MySQL's fulltext indexes. |
360
|
|
|
* |
361
|
|
|
* @param \Cake\ORM\Query $query The query to scope |
362
|
|
|
* @param \Search\TokenInterface $token Token describing a words sequence. e.g `this is a phrase` |
363
|
|
|
* @return \Cake\ORM\Query Scoped query |
364
|
|
|
*/ |
365
|
|
|
protected function _scopeWordsInFulltext(Query $query, TokenInterface $token) |
366
|
|
|
{ |
367
|
|
|
$value = str_replace(['*', '!'], ['*', '*'], $token->value()); |
368
|
|
|
$value = mb_strpos($value, '+') === 0 ? mb_substr($value, 1) : $value; |
369
|
|
|
|
370
|
|
|
if (empty($value) || in_array($value, $this->_stopWords())) { |
371
|
|
|
return $query; |
372
|
|
|
} |
373
|
|
|
|
374
|
|
|
$not = $token->negated() ? 'NOT' : ''; |
375
|
|
|
$value = str_replace("'", '"', $value); |
376
|
|
|
$conditions = ["{$not} MATCH(SearchDatasets.words) AGAINST('{$value}' IN BOOLEAN MODE) > 0"]; |
377
|
|
|
|
378
|
|
View Code Duplication |
if ($token->where() === 'or') { |
379
|
|
|
$query->orWhere($conditions); |
380
|
|
|
} elseif ($token->where() === 'and') { |
381
|
|
|
$query->andWhere($conditions); |
382
|
|
|
} else { |
383
|
|
|
$query->where($conditions); |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
return $query; |
387
|
|
|
} |
388
|
|
|
|
389
|
|
|
/** |
390
|
|
|
* Whether FullText index is available or not and should be used. |
391
|
|
|
* |
392
|
|
|
* @return bool True if enabled and should be used, false otherwise |
393
|
|
|
*/ |
394
|
|
|
protected function _isFullTextEnabled() |
395
|
|
|
{ |
396
|
|
|
if (!$this->config('fulltext')) { |
|
|
|
|
397
|
|
|
return false; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
static $enabled = null; |
401
|
|
|
if ($enabled !== null) { |
402
|
|
|
return $enabled; |
403
|
|
|
} |
404
|
|
|
|
405
|
|
|
list(, $driverClass) = namespaceSplit(strtolower(get_class($this->_table->connection()->driver()))); |
|
|
|
|
406
|
|
|
if ($driverClass != 'mysql') { |
407
|
|
|
$enabled = false; |
408
|
|
|
|
409
|
|
|
return false; |
410
|
|
|
} |
411
|
|
|
|
412
|
|
|
$schema = $this->_table->SearchDatasets->schema(); |
413
|
|
|
foreach ($schema->indexes() as $index) { |
414
|
|
|
$info = $schema->index($index); |
415
|
|
|
if (in_array('words', $info['columns']) && |
416
|
|
|
strtolower($info['type']) == 'fulltext' |
417
|
|
|
) { |
418
|
|
|
$enabled = true; |
419
|
|
|
|
420
|
|
|
return true; |
421
|
|
|
} |
422
|
|
|
} |
423
|
|
|
|
424
|
|
|
$enabled = false; |
425
|
|
|
|
426
|
|
|
return false; |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
/** |
430
|
|
|
* Gets a list of storage engine's stopwords. That is words that is considered |
431
|
|
|
* common or Trivial enough that it is omitted from the search index and ignored |
432
|
|
|
* in search queries |
433
|
|
|
* |
434
|
|
|
* @return array List of words |
435
|
|
|
*/ |
436
|
|
|
protected function _stopWords() |
437
|
|
|
{ |
438
|
|
|
$conn = $this->_table->find()->connection(); |
|
|
|
|
439
|
|
|
$cacheKey = $conn->configName() . '_generic_engine_stopwords_list'; |
440
|
|
|
if ($cache = Cache::read($cacheKey, '_cake_model_')) { |
441
|
|
|
return (array)$cache; |
442
|
|
|
} |
443
|
|
|
|
444
|
|
|
$words = []; |
445
|
|
|
$sql = $conn |
446
|
|
|
->execute('SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DEFAULT_STOPWORD') |
447
|
|
|
->fetchAll('assoc'); |
448
|
|
|
|
449
|
|
|
foreach ((array)$sql as $row) { |
450
|
|
|
if (!empty($row['value'])) { |
451
|
|
|
$words[] = $row['value']; |
452
|
|
|
} |
453
|
|
|
} |
454
|
|
|
|
455
|
|
|
Cache::write($cacheKey, $words, '_cake_model_'); |
456
|
|
|
|
457
|
|
|
return $words; |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
/** |
461
|
|
|
* Calculates entity's primary key. |
462
|
|
|
* |
463
|
|
|
* @param \Cake\Datasource\EntityInterface $entity The entity |
464
|
|
|
* @return string |
465
|
|
|
* @deprecated Use direct access as `$entity->get($this->config('pk'))` |
466
|
|
|
*/ |
467
|
|
|
protected function _entityId(EntityInterface $entity) |
468
|
|
|
{ |
469
|
|
|
return $entity->get($this->config('pk')); |
|
|
|
|
470
|
|
|
} |
471
|
|
|
|
472
|
|
|
/** |
473
|
|
|
* Extracts a list of words to by indexed for given entity. |
474
|
|
|
* |
475
|
|
|
* NOTE: Words can be repeated, this allows to search phrases. |
476
|
|
|
* |
477
|
|
|
* @param \Cake\Datasource\EntityInterface $entity The entity for which generate |
478
|
|
|
* the list of words |
479
|
|
|
* @return string Space-separated list of words. e.g. `cat dog this that` |
480
|
|
|
*/ |
481
|
|
|
protected function _extractEntityWords(EntityInterface $entity) |
482
|
|
|
{ |
483
|
|
|
$text = ''; |
484
|
|
|
$entityArray = $entity->toArray(); |
485
|
|
|
$entityArray = Hash::flatten($entityArray); |
486
|
|
|
foreach ($entityArray as $key => $value) { |
487
|
|
|
if (is_string($value) || is_numeric($value)) { |
488
|
|
|
$text .= " {$value}"; |
489
|
|
|
} |
490
|
|
|
} |
491
|
|
|
|
492
|
|
|
$text = str_replace(["\n", "\r"], '', trim((string)$text)); // remove new lines |
493
|
|
|
$text = strip_tags($text); // remove HTML tags, but keep their content |
494
|
|
|
$strict = $this->config('strict'); |
|
|
|
|
495
|
|
|
|
496
|
|
|
if (!empty($strict)) { |
497
|
|
|
// only: space, digits (0-9), letters (any language), ".", ",", "-", "_", "/", "\" |
498
|
|
|
$pattern = is_string($strict) ? $strict : '[^\p{L}\p{N}\s\@\.\,\-\_\/\\0-9]'; |
499
|
|
|
$text = preg_replace('/' . $pattern . '/ui', ' ', $text); |
500
|
|
|
} |
501
|
|
|
|
502
|
|
|
$text = trim(preg_replace('/\s{2,}/i', ' ', $text)); // remove double spaces |
503
|
|
|
$text = mb_strtolower($text); // all to lowercase |
504
|
|
|
$text = $this->_filterText($text); // filter |
505
|
|
|
$text = iconv('UTF-8', 'UTF-8//IGNORE', mb_convert_encoding($text, 'UTF-8')); // remove any invalid character |
506
|
|
|
|
507
|
|
|
return trim($text); |
508
|
|
|
} |
509
|
|
|
|
510
|
|
|
/** |
511
|
|
|
* Removes any invalid word from the given text. |
512
|
|
|
* |
513
|
|
|
* @param string $text The text to filter |
514
|
|
|
* @return string Filtered text |
515
|
|
|
*/ |
516
|
|
|
protected function _filterText($text) |
517
|
|
|
{ |
518
|
|
|
// return true means `yes, it's banned` |
519
|
|
|
if (is_callable($this->config('bannedWords'))) { |
|
|
|
|
520
|
|
|
$isBanned = function ($word) { |
521
|
|
|
$callable = $this->config('bannedWords'); |
|
|
|
|
522
|
|
|
|
523
|
|
|
return $callable($word); |
524
|
|
|
}; |
525
|
|
|
} else { |
526
|
|
|
$isBanned = function ($word) { |
527
|
|
|
return in_array($word, (array)$this->config('bannedWords')) || empty($word); |
528
|
|
|
}; |
529
|
|
|
} |
530
|
|
|
|
531
|
|
|
$words = explode(' ', $text); |
532
|
|
|
foreach ($words as $i => $w) { |
533
|
|
|
if ($isBanned($w)) { |
534
|
|
|
unset($words[$i]); |
535
|
|
|
} |
536
|
|
|
} |
537
|
|
|
|
538
|
|
|
return implode(' ', $words); |
539
|
|
|
} |
540
|
|
|
} |
541
|
|
|
|
This method has been deprecated. The supplier of the class has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.