Completed
Push — development ( 7e0ebb...9492b7 )
by Thomas
20:59 queued 14:58
created

ftsearch.inc.php ➔ ftsearch_refresh_all_cache_descs()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 28
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nc 2
nop 0
dl 0
loc 28
rs 8.8571
c 0
b 0
f 0
1
<?php
2
/****************************************************************************
3
 * For license information see LICENSE.md
4
 *
5
 *
6
 * functions for the full text search-engine
7
 ****************************************************************************/
8
9
/* begin conversion rules */
10
11
$ftSearchSimpleRules[] = [
12
    'qu',
13
    'k'
14
];
15
$ftSearchSimpleRules[] = [
16
    'ts',
17
    'z'
18
];
19
$ftSearchSimpleRules[] = [
20
    'tz',
21
    'z'
22
];
23
$ftSearchSimpleRules[] = [
24
    'alp',
25
    'alb'
26
];
27
$ftSearchSimpleRules[] = [
28
    'y',
29
    'i'
30
];
31
$ftSearchSimpleRules[] = [
32
    'ai',
33
    'ei'
34
];
35
$ftSearchSimpleRules[] = [
36
    'ou',
37
    'u'
38
];
39
$ftSearchSimpleRules[] = [
40
    'th',
41
    't'
42
];
43
$ftSearchSimpleRules[] = [
44
    'ph',
45
    'f'
46
];
47
$ftSearchSimpleRules[] = [
48
    'oh',
49
    'o'
50
];
51
$ftSearchSimpleRules[] = [
52
    'ah',
53
    'a'
54
];
55
$ftSearchSimpleRules[] = [
56
    'eh',
57
    'e'
58
];
59
$ftSearchSimpleRules[] = [
60
    'aux',
61
    'o'
62
];
63
$ftSearchSimpleRules[] = [
64
    'eau',
65
    'o'
66
];
67
$ftSearchSimpleRules[] = [
68
    'eux',
69
    'oe'
70
];
71
$ftSearchSimpleRules[] = [
72
    '^ch',
73
    'sch'
74
];
75
$ftSearchSimpleRules[] = [
76
    'ck',
77
    'k'
78
];
79
$ftSearchSimpleRules[] = [
80
    'ie',
81
    'i'
82
];
83
$ftSearchSimpleRules[] = [
84
    'ih',
85
    'i'
86
];
87
$ftSearchSimpleRules[] = [
88
    'ent',
89
    'end'
90
];
91
$ftSearchSimpleRules[] = [
92
    'uh',
93
    'u'
94
];
95
$ftSearchSimpleRules[] = [
96
    'sh',
97
    'sch'
98
];
99
$ftSearchSimpleRules[] = [
100
    'ver',
101
    'wer'
102
];
103
$ftSearchSimpleRules[] = [
104
    'dt',
105
    't'
106
];
107
$ftSearchSimpleRules[] = [
108
    'hard',
109
    'hart'
110
];
111
$ftSearchSimpleRules[] = [
112
    'egg',
113
    'ek'
114
];
115
$ftSearchSimpleRules[] = [
116
    'eg',
117
    'ek'
118
];
119
$ftSearchSimpleRules[] = [
120
    'cr',
121
    'kr'
122
];
123
$ftSearchSimpleRules[] = [
124
    'ca',
125
    'ka'
126
];
127
$ftSearchSimpleRules[] = [
128
    'ce',
129
    'ze'
130
];
131
$ftSearchSimpleRules[] = [
132
    'x',
133
    'ks'
134
];
135
$ftSearchSimpleRules[] = [
136
    've',
137
    'we'
138
];
139
$ftSearchSimpleRules[] = [
140
    'va',
141
    'wa'
142
];
143
144
/* end conversion rules */
145
146
/**
147
 * @param $str
148
 *
149
 * @return array
150
 */
151
function ftsearch_hash(&$str)
152
{
153
    $astr = ftsearch_split($str, true);
154
    foreach ($astr as $k => $s) {
155
        if (strlen($s) > 2) {
156
            $astr[$k] = sprintf("%u", crc32($s));
157
        } else {
158
            unset($astr[$k]);
159
        }
160
    }
161
162
    return $astr;
163
}
164
165
/**
166
 * str = single word
167
 * @param $str
168
 * @param $simple
169
 *
170
 * @return array
171
 */
172
function ftsearch_split(&$str, $simple)
173
{
174
    global $ftsearch_ignores;
175
176
    // interpunktion
177
    $str = mb_ereg_replace('\\?', ' ', $str);
178
    $str = mb_ereg_replace('\\)', ' ', $str);
179
    $str = mb_ereg_replace('\\(', ' ', $str);
180
    $str = mb_ereg_replace('\\.', ' ', $str);
181
    $str = mb_ereg_replace('´', ' ', $str);
182
    $str = mb_ereg_replace('`', ' ', $str);
183
    $str = mb_ereg_replace('\'', ' ', $str);
184
    $str = mb_ereg_replace('/', ' ', $str);
185
    $str = mb_ereg_replace(':', ' ', $str);
186
    $str = mb_ereg_replace(',', ' ', $str);
187
    $str = mb_ereg_replace("\r\n", ' ', $str);
188
    $str = mb_ereg_replace("\n", ' ', $str);
189
    $str = mb_ereg_replace("\r", ' ', $str);
190
191
    $ostr = '';
192
    while ($ostr != $str) {
193
        $ostr = $str;
194
        $str = mb_ereg_replace('  ', ' ', $str);
195
    }
196
197
    $astr = mb_split(' ', $str);
198
    $str = '';
199
200
    ftsearch_load_ignores();
201
    for ($i = count($astr) - 1; $i >= 0; $i --) {
202
        // ignore?
203
        if (array_search(mb_strtolower($astr[$i]), $ftsearch_ignores) !== false) {
204
            unset($astr[$i]);
205
        } else {
206
            if ($simple) {
207
                $astr[$i] = ftsearch_text2simple($astr[$i]);
208
            }
209
210
            if ($astr[$i] == '') {
211
                unset($astr[$i]);
212
            }
213
        }
214
    }
215
216
    return $astr;
217
}
218
219
function ftsearch_load_ignores()
220
{
221
    global $ftsearch_ignores;
222
    global $ftsearch_ignores_loaded;
223
224
    if ($ftsearch_ignores_loaded != true) {
225
        $ftsearch_ignores = [];
226
227
        $rs = sql('SELECT `word` FROM `search_ignore`');
228
        while ($r = sql_fetch_assoc($rs)) {
229
            $ftsearch_ignores[] = $r['word'];
230
        }
231
        sql_free_result($rs);
232
233
        $ftsearch_ignores_loaded = true;
234
    }
235
}
236
237
/**
238
 * str = single word
239
 * @param $str
240
 *
241
 * @return mixed|string
242
 */
243
function ftsearch_text2simple($str)
244
{
245
    global $ftSearchSimpleRules;
246
247
    $str = ftsearch_text2sort($str);
248
249
    // regeln anwenden
250
    foreach ($ftSearchSimpleRules as $rule) {
251
        $str = mb_ereg_replace($rule[0], $rule[1], $str);
252
    }
253
254
    // doppelte chars ersetzen
255
    $ordZ = ord('z');
256
    for ($c = ord('a'); $c <= $ordZ; $c ++) {
257
        $old_str = '';
258
        while ($old_str !== $str) {
259
            $old_str = $str;
260
            $str = mb_ereg_replace(chr($c) . chr($c), chr($c), $str);
261
        }
262
    }
263
264
    return $str;
265
}
266
267
/**
268
 * str = single word
269
 * @param $str
270
 *
271
 * @return mixed|string
272
 */
273
function ftsearch_text2sort($str)
274
{
275
    $str = mb_strtolower($str);
276
277
    // deutsches
278
    $str = mb_ereg_replace('ä', 'ae', $str);
279
    $str = mb_ereg_replace('ö', 'oe', $str);
280
    $str = mb_ereg_replace('ü', 'ue', $str);
281
    $str = mb_ereg_replace('Ä', 'ae', $str);
282
    $str = mb_ereg_replace('Ö', 'oe', $str);
283
    $str = mb_ereg_replace('Ü', 'ue', $str);
284
    $str = mb_ereg_replace('ß', 'ss', $str);
285
286
    // akzente usw.
287
    $str = mb_ereg_replace('à', 'a', $str);
288
    $str = mb_ereg_replace('á', 'a', $str);
289
    $str = mb_ereg_replace('â', 'a', $str);
290
    $str = mb_ereg_replace('è', 'e', $str);
291
    $str = mb_ereg_replace('é', 'e', $str);
292
    $str = mb_ereg_replace('ë', 'e', $str);
293
    $str = mb_ereg_replace('É', 'e', $str);
294
    $str = mb_ereg_replace('ô', 'o', $str);
295
    $str = mb_ereg_replace('ó', 'o', $str);
296
    $str = mb_ereg_replace('ò', 'o', $str);
297
    $str = mb_ereg_replace('ê', 'e', $str);
298
    $str = mb_ereg_replace('ě', 'e', $str);
299
    $str = mb_ereg_replace('û', 'u', $str);
300
    $str = mb_ereg_replace('ç', 'c', $str);
301
    $str = mb_ereg_replace('c', 'c', $str);
302
    $str = mb_ereg_replace('ć', 'c', $str);
303
    $str = mb_ereg_replace('î', 'i', $str);
304
    $str = mb_ereg_replace('ï', 'i', $str);
305
    $str = mb_ereg_replace('ì', 'i', $str);
306
    $str = mb_ereg_replace('í', 'i', $str);
307
    $str = mb_ereg_replace('ł', 'l', $str);
308
    $str = mb_ereg_replace('š', 's', $str);
309
    $str = mb_ereg_replace('Š', 's', $str);
310
    $str = mb_ereg_replace('u', 'u', $str);
311
    $str = mb_ereg_replace('ý', 'y', $str);
312
    $str = mb_ereg_replace('ž', 'z', $str);
313
    $str = mb_ereg_replace('Ž', 'Z', $str);
314
315
    $str = mb_ereg_replace('Æ', 'ae', $str);
316
    $str = mb_ereg_replace('æ', 'ae', $str);
317
    $str = mb_ereg_replace('œ', 'oe', $str);
318
319
    // sonstiges
320
    $str = mb_ereg_replace('[^A-Za-z ]', '', $str);
321
322
    return $str;
323
}
324
325
function ftsearch_refresh()
326
{
327
    ftsearch_refresh_all_caches();
328
    ftsearch_refresh_all_cache_descs();
329
    ftsearch_refresh_all_pictures();
330
    ftsearch_refresh_all_cache_logs();
331
}
332
333
function ftsearch_refresh_all_caches()
334
{
335
    $rs = sql(
336
        'SELECT `caches`.`cache_id`
337
         FROM `caches`
338
         LEFT JOIN `search_index_times`
339
             ON `caches`.`cache_id`=`search_index_times`.`object_id`
340
             AND 2 =`search_index_times`.`object_type`
341
         WHERE `caches`.`status`!= 5
342
         AND ISNULL(`search_index_times`.`object_id`)
343
         UNION DISTINCT
344
         SELECT `caches`.`cache_id`
345
         FROM `caches`
346
         INNER JOIN `search_index_times`
347
             ON `search_index_times`.`object_type`=2
348
             AND `caches`.`cache_id`=`search_index_times`.`object_id`
349
         WHERE `caches`.`last_modified` > `search_index_times`.`last_refresh`
350
             AND `caches`.`status`!=5'
351
    );
352
    while ($r = sql_fetch_assoc($rs)) {
353
        ftsearch_refresh_cache($r['cache_id']);
354
    }
355
    sql_free_result($rs);
356
}
357
358
/**
359
 * @param $cache_id
360
 */
361 View Code Duplication
function ftsearch_refresh_cache($cache_id)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
362
{
363
    $rs = sql("SELECT `name`, `last_modified` FROM `caches` WHERE `cache_id`='&1'", $cache_id);
364
    if ($r = sql_fetch_assoc($rs)) {
365
        ftsearch_set_entries(2, $cache_id, $cache_id, $r['name'], $r['last_modified']);
366
    }
367
    sql_free_result($rs);
368
}
369
370
371
function ftsearch_refresh_all_cache_descs()
372
{
373
    $rs = sql(
374
        'SELECT `cache_desc`.`id`
375
         FROM `cache_desc`
376
         INNER JOIN `caches`
377
             ON `caches`.`cache_id`=`cache_desc`.`cache_id`
378
         LEFT JOIN `search_index_times`
379
             ON `cache_desc`.`id`=`search_index_times`.`object_id`
380
             AND 3=`search_index_times`.`object_type`
381
         WHERE `caches`.`status`!= 5
382
             AND ISNULL(`search_index_times`.`object_id`)
383
         UNION DISTINCT
384
         SELECT `cache_desc`.`id`
385
         FROM `cache_desc`
386
         INNER JOIN `caches`
387
             ON `caches`.`cache_id`=`cache_desc`.`cache_id`
388
         INNER JOIN `search_index_times`
389
             ON `search_index_times`.`object_type` = 3
390
             AND `cache_desc`.`id`=`search_index_times`.`object_id`
391
         WHERE `cache_desc`.`last_modified`>`search_index_times`.`last_refresh`
392
             AND `caches`.`status`!=5'
393
    );
394
    while ($r = sql_fetch_assoc($rs)) {
395
        ftsearch_refresh_cache_desc($r['id']);
396
    }
397
    sql_free_result($rs);
398
}
399
400
/**
401
 * @param $id
402
 */
403 View Code Duplication
function ftsearch_refresh_cache_desc($id)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
404
{
405
    $rs = sql(
406
        "
407
        SELECT
408
          `cache_id`,
409
          CONCAT(`desc`, ' ', `short_desc`) AS `desc`,
410
          `last_modified`
411
        FROM `cache_desc`
412
        WHERE `id`='&1'",
413
        $id
414
    );
415
    if ($r = sql_fetch_assoc($rs)) {
416
        $r['desc'] = ftsearch_strip_html($r['desc']);
417
        ftsearch_set_entries(3, $id, $r['cache_id'], $r['desc'], $r['last_modified']);
418
    }
419
    sql_free_result($rs);
420
}
421
422
function ftsearch_refresh_all_pictures()
423
{
424
    $rs = sql(
425
        '
426
        SELECT `pictures`.`id`
427
        FROM `pictures`
428
        LEFT JOIN `search_index_times`
429
            ON `pictures`.`id`=`search_index_times`.`object_id`
430
            AND 6=`search_index_times`.`object_type`
431
        WHERE ISNULL(`search_index_times`.`object_id`)
432
        UNION DISTINCT
433
        SELECT `pictures`.`id`
434
        FROM `pictures`
435
        INNER JOIN `search_index_times`
436
            ON `search_index_times`.`object_type`= 6
437
            AND `pictures`.`id`=`search_index_times`.`object_id`
438
        WHERE `pictures`.`last_modified`>`search_index_times`.`last_refresh`'
439
    );
440
    while ($r = sql_fetch_assoc($rs)) {
441
        ftsearch_refresh_picture($r['id']);
442
    }
443
    sql_free_result($rs);
444
}
445
446
/**
447
 * @param $id
448
 */
449 View Code Duplication
function ftsearch_refresh_picture($id)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
450
{
451
    $rs = sql(
452
        "
453
        SELECT
454
            `caches`.`cache_id`,
455
            `pictures`.`title`,
456
            `pictures`.`last_modified`
457
        FROM `pictures`
458
        INNER JOIN `caches`
459
            ON `pictures`.`object_type`=2
460
            AND `caches`.`cache_id`=`pictures`.`object_id`
461
        WHERE `pictures`.`id`='&1'
462
        UNION DISTINCT
463
        SELECT
464
            `cache_logs`.`cache_id`,
465
            `pictures`.`title`,
466
            `pictures`.`last_modified`
467
        FROM `pictures`
468
        INNER JOIN `cache_logs`
469
            ON `pictures`.`object_type`= 1
470
            AND `cache_logs`.`id`=`pictures`.`object_id`
471
        WHERE `pictures`.`id`='&1'
472
        LIMIT 1",
473
        $id
474
    );
475
    if ($r = sql_fetch_assoc($rs)) {
476
        ftsearch_set_entries(6, $id, $r['cache_id'], $r['title'], $r['last_modified']);
477
    }
478
    sql_free_result($rs);
479
}
480
481
function ftsearch_refresh_all_cache_logs()
482
{
483
    $rs = sql(
484
        '
485
        SELECT `cache_logs`.`id`
486
        FROM `cache_logs`
487
        LEFT JOIN `search_index_times`
488
            ON `cache_logs`.`id`=`search_index_times`.`object_id`
489
            AND 1=`search_index_times`.`object_type`
490
        WHERE ISNULL(`search_index_times`.`object_id`)
491
        UNION DISTINCT
492
        SELECT `cache_logs`.`id`
493
        FROM `cache_logs`
494
        INNER JOIN `search_index_times`
495
            ON `search_index_times`.`object_type`= 1
496
            AND `cache_logs`.`id`=`search_index_times`.`object_id`
497
        WHERE `cache_logs`.`last_modified`>`search_index_times`.`last_refresh`'
498
    );
499
    while ($r = sql_fetch_assoc($rs)) {
500
        ftsearch_refresh_cache_logs($r['id']);
501
    }
502
    sql_free_result($rs);
503
}
504
505 View Code Duplication
function ftsearch_refresh_cache_logs($id)
0 ignored issues
show
Duplication introduced by
This function seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
506
{
507
    $rs = sql("SELECT `cache_id`, `text`, `last_modified` FROM `cache_logs` WHERE `id`='&1'", $id);
508
    if ($r = sql_fetch_assoc($rs)) {
509
        $r['text'] = ftsearch_strip_html($r['text']);
510
        ftsearch_set_entries(1, $id, $r['cache_id'], $r['text'], $r['last_modified']);
511
    }
512
    sql_free_result($rs);
513
}
514
515
/**
516
 * @param $object_type
517
 * @param $object_id
518
 * @param $cache_id
519
 */
520
function ftsearch_delete_entries($object_type, $object_id, $cache_id)
521
{
522
    sql("DELETE FROM `search_index` WHERE `object_type`='&1' AND `cache_id`='&2'", $object_type, $cache_id);
523
    sql("DELETE FROM `search_index_times` WHERE `object_type`='&1' AND `object_id`='&2'", $object_type, $object_id);
524
}
525
526
/**
527
 * @param $object_type
528
 * @param $object_id
529
 * @param $cache_id
530
 * @param $text
531
 * @param $last_modified
532
 */
533
function ftsearch_set_entries($object_type, $object_id, $cache_id, &$text, $last_modified)
534
{
535
    ftsearch_delete_entries($object_type, $object_id, $cache_id);
536
537
    $ahash = ftsearch_hash($text);
538
    foreach ($ahash as $k => $h) {
539
        sql(
540
            "INSERT INTO `search_index` (`object_type`, `cache_id`, `hash`, `count`)
541
            VALUES ('&1', '&2', '&3', '&4') ON DUPLICATE KEY UPDATE `count`=`count`+1",
542
            $object_type,
543
            $cache_id,
544
            $h,
545
            1
546
        );
547
    }
548
    sql(
549
        "INSERT INTO `search_index_times` (`object_id`, `object_type`, `last_refresh`)
550
        VALUES ('&1', '&2', '&3') ON DUPLICATE KEY UPDATE `last_refresh`='&3'",
551
        $object_id,
552
        $object_type,
553
        $last_modified
554
    );
555
}
556
557
/**
558
 * @param $text
559
 *
560
 * @return mixed|string
561
 */
562
function ftsearch_strip_html($text)
563
{
564
    $text = str_replace(["\n", "\r", '<br />', '<br/>', '<br>'], ' ', $text);
565
    $text = strip_tags($text);
566
    $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
567
568
    return $text;
569
}
570