Passed
Push — dbal ( 545eb7...a65111 )
by Greg
13:50 queued 06:40
created

FixSearchAndReplace::recordQuery()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 24
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 16
nc 5
nop 3
dl 0
loc 24
rs 9.4222
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2023 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Module;
21
22
use Fisharebest\Webtrees\DB;
23
use Fisharebest\Webtrees\Family;
24
use Fisharebest\Webtrees\GedcomRecord;
25
use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
26
use Fisharebest\Webtrees\I18N;
27
use Fisharebest\Webtrees\Individual;
28
use Fisharebest\Webtrees\Location;
29
use Fisharebest\Webtrees\Media;
30
use Fisharebest\Webtrees\Note;
31
use Fisharebest\Webtrees\Repository;
32
use Fisharebest\Webtrees\Services\DataFixService;
33
use Fisharebest\Webtrees\Source;
34
use Fisharebest\Webtrees\Submitter;
35
use Fisharebest\Webtrees\Tree;
36
use Illuminate\Database\Query\Builder;
37
use Illuminate\Support\Collection;
38
use Throwable;
39
40
use function addcslashes;
41
use function asort;
42
use function preg_match;
43
use function preg_quote;
44
use function preg_replace;
45
use function view;
46
47
/**
48
 * Class FixSearchAndReplace
49
 */
50
class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
51
{
52
    use ModuleDataFixTrait;
53
54
    // A regular expression that never matches.
55
    private const INVALID_REGEX = '/(?!)/';
56
57
    private DataFixService $data_fix_service;
58
59
    /**
60
     * @param DataFixService $data_fix_service
61
     */
62
    public function __construct(DataFixService $data_fix_service)
63
    {
64
        $this->data_fix_service = $data_fix_service;
65
    }
66
67
    /**
68
     * How should this module be identified in the control panel, etc.?
69
     *
70
     * @return string
71
     */
72
    public function title(): string
73
    {
74
        /* I18N: Name of a module */
75
        return I18N::translate('Search and replace');
76
    }
77
78
    /**
79
     * A sentence describing what this module does.
80
     *
81
     * @return string
82
     */
83
    public function description(): string
84
    {
85
        /* I18N: Description of a “Data fix” module */
86
        return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
87
    }
88
89
    /**
90
     * Options form.
91
     *
92
     * @param Tree $tree
93
     *
94
     * @return string
95
     */
96
    public function fixOptions(Tree $tree): string
97
    {
98
        $methods = [
99
            'exact'     => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
100
            'words'     => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
101
            'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
102
            /* I18N: https://en.wikipedia.org/wiki/Regular_expression */
103
            'regex'     => I18N::translate('Regular expression'),
104
        ];
105
106
        $types = [
107
            Family::RECORD_TYPE     => I18N::translate('Families'),
108
            Individual::RECORD_TYPE => I18N::translate('Individuals'),
109
            Location::RECORD_TYPE   => I18N::translate('Locations'),
110
            Media::RECORD_TYPE      => I18N::translate('Media objects'),
111
            Note::RECORD_TYPE       => I18N::translate('Notes'),
112
            Repository::RECORD_TYPE => I18N::translate('Repositories'),
113
            Source::RECORD_TYPE     => I18N::translate('Sources'),
114
            Submitter::RECORD_TYPE  => I18N::translate('Submitters'),
115
        ];
116
117
        asort($types);
118
119
        return view('modules/fix-search-and-replace/options', [
120
            'default_method' => 'exact',
121
            'default_type'   => Individual::RECORD_TYPE,
122
            'methods'        => $methods,
123
            'types'          => $types,
124
        ]);
125
    }
126
127
    /**
128
     * A list of all records that need examining.  This may include records
129
     * that do not need updating, if we can't detect this quickly using SQL.
130
     *
131
     * @param Tree                 $tree
132
     * @param array<string,string> $params
133
     *
134
     * @return Collection<int,string>|null
135
     */
136
    protected function familiesToFix(Tree $tree, array $params): Collection|null
137
    {
138
        if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') {
139
            return null;
140
        }
141
142
        $query = DB::table('families')->where('f_file', '=', $tree->id());
143
        $this->recordQuery($query, 'f_gedcom', $params);
144
145
        return $query->pluck('f_id');
146
    }
147
148
    /**
149
     * A list of all records that need examining.  This may include records
150
     * that do not need updating, if we can't detect this quickly using SQL.
151
     *
152
     * @param Tree                 $tree
153
     * @param array<string,string> $params
154
     *
155
     * @return Collection<int,string>|null
156
     */
157
    protected function individualsToFix(Tree $tree, array $params): Collection|null
158
    {
159
        if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') {
160
            return null;
161
        }
162
163
        $query = DB::table('individuals')
164
            ->where('i_file', '=', $tree->id());
165
166
        $this->recordQuery($query, 'i_gedcom', $params);
167
168
        return $query->pluck('i_id');
169
    }
170
171
    /**
172
     * A list of all records that need examining.  This may include records
173
     * that do not need updating, if we can't detect this quickly using SQL.
174
     *
175
     * @param Tree                 $tree
176
     * @param array<string,string> $params
177
     *
178
     * @return Collection<int,string>|null
179
     */
180
    protected function locationsToFix(Tree $tree, array $params): Collection|null
181
    {
182
        if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') {
183
            return null;
184
        }
185
186
        $query = DB::table('other')
187
            ->where('o_file', '=', $tree->id())
188
            ->where('o_type', '=', Location::RECORD_TYPE);
189
190
        $this->recordQuery($query, 'o_gedcom', $params);
191
192
        return $query->pluck('o_id');
193
    }
194
195
    /**
196
     * A list of all records that need examining.  This may include records
197
     * that do not need updating, if we can't detect this quickly using SQL.
198
     *
199
     * @param Tree                 $tree
200
     * @param array<string,string> $params
201
     *
202
     * @return Collection<int,string>|null
203
     */
204
    protected function mediaToFix(Tree $tree, array $params): Collection|null
205
    {
206
        if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') {
207
            return null;
208
        }
209
210
        $query = DB::table('media')
211
            ->where('m_file', '=', $tree->id());
212
213
        $this->recordQuery($query, 'm_gedcom', $params);
214
215
        return $query->pluck('m_id');
216
    }
217
218
    /**
219
     * A list of all records that need examining.  This may include records
220
     * that do not need updating, if we can't detect this quickly using SQL.
221
     *
222
     * @param Tree                 $tree
223
     * @param array<string,string> $params
224
     *
225
     * @return Collection<int,string>|null
226
     */
227
    protected function notesToFix(Tree $tree, array $params): Collection|null
228
    {
229
        if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') {
230
            return null;
231
        }
232
233
        $query = DB::table('other')
234
            ->where('o_file', '=', $tree->id())
235
            ->where('o_type', '=', Note::RECORD_TYPE);
236
237
        $this->recordQuery($query, 'o_gedcom', $params);
238
239
        return $query->pluck('o_id');
240
    }
241
242
    /**
243
     * A list of all records that need examining.  This may include records
244
     * that do not need updating, if we can't detect this quickly using SQL.
245
     *
246
     * @param Tree                 $tree
247
     * @param array<string,string> $params
248
     *
249
     * @return Collection<int,string>|null
250
     */
251
    protected function repositoriesToFix(Tree $tree, array $params): Collection|null
252
    {
253
        if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') {
254
            return null;
255
        }
256
257
        $query = DB::table('other')
258
            ->where('o_file', '=', $tree->id())
259
            ->where('o_type', '=', Repository::RECORD_TYPE);
260
261
        $this->recordQuery($query, 'o_gedcom', $params);
262
263
        return $query->pluck('o_id');
264
    }
265
266
    /**
267
     * A list of all records that need examining.  This may include records
268
     * that do not need updating, if we can't detect this quickly using SQL.
269
     *
270
     * @param Tree                 $tree
271
     * @param array<string,string> $params
272
     *
273
     * @return Collection<int,string>|null
274
     */
275
    protected function sourcesToFix(Tree $tree, array $params): Collection|null
276
    {
277
        if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') {
278
            return null;
279
        }
280
281
        $query = $this->sourcesToFixQuery($tree, $params);
282
283
        $this->recordQuery($query, 's_gedcom', $params);
284
285
        return $query->pluck('s_id');
286
    }
287
288
    /**
289
     * A list of all records that need examining.  This may include records
290
     * that do not need updating, if we can't detect this quickly using SQL.
291
     *
292
     * @param Tree                 $tree
293
     * @param array<string,string> $params
294
     *
295
     * @return Collection<int,string>|null
296
     */
297
    protected function submittersToFix(Tree $tree, array $params): Collection|null
298
    {
299
        if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') {
300
            return null;
301
        }
302
303
        $query = $this->submittersToFixQuery($tree, $params);
304
305
        $this->recordQuery($query, 'o_gedcom', $params);
306
307
        return $query->pluck('o_id');
308
    }
309
310
    /**
311
     * Does a record need updating?
312
     *
313
     * @param GedcomRecord         $record
314
     * @param array<string,string> $params
315
     *
316
     * @return bool
317
     */
318
    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
319
    {
320
        return preg_match($this->createRegex($params), $record->gedcom()) === 1;
321
    }
322
323
    /**
324
     * Show the changes we would make
325
     *
326
     * @param GedcomRecord         $record
327
     * @param array<string,string> $params
328
     *
329
     * @return string
330
     */
331
    public function previewUpdate(GedcomRecord $record, array $params): string
332
    {
333
        $old = $record->gedcom();
334
        $new = $this->updateGedcom($record, $params);
335
336
        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
337
    }
338
339
    /**
340
     * Fix a record
341
     *
342
     * @param GedcomRecord         $record
343
     * @param array<string,string> $params
344
     *
345
     * @return void
346
     */
347
    public function updateRecord(GedcomRecord $record, array $params): void
348
    {
349
        $record->updateRecord($this->updateGedcom($record, $params), false);
350
    }
351
352
    /**
353
     * @param GedcomRecord         $record
354
     * @param array<string,string> $params
355
     *
356
     * @return string
357
     */
358
    private function updateGedcom(GedcomRecord $record, array $params): string
359
    {
360
        // Allow "\n" to indicate a line-feed in replacement text.
361
        // Back-references such as $1, $2 are handled automatically.
362
        $replace = strtr($params['replace-with'], ['\n' => "\n"]);
363
364
        $regex = $this->createRegex($params);
365
366
        return preg_replace($regex, $replace, $record->gedcom());
367
    }
368
369
    /**
370
     * Create a regular expression from the search pattern.
371
     *
372
     * @param array<string,string> $params
373
     *
374
     * @return string
375
     */
376
    private function createRegex(array $params): string
377
    {
378
        $search = $params['search-for'];
379
        $method = $params['method'];
380
        $case   = $params['case'];
381
382
        switch ($method) {
383
            case 'exact':
384
                return '/' . preg_quote($search, '/') . '/u' . $case;
385
386
            case 'words':
387
                return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
388
389
            case 'wildcards':
390
                return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
391
392
            case 'regex':
393
                $regex = '/' . addcslashes($search, '/') . '/u' . $case;
394
395
                try {
396
                    // A valid regex on an empty string returns zero.
397
                    // An invalid regex on an empty string returns false and throws a warning.
398
                    preg_match($regex, '');
399
                } catch (Throwable) {
400
                    $regex = self::INVALID_REGEX;
401
                }
402
403
                return $regex;
404
        }
405
406
        throw new HttpNotFoundException();
407
    }
408
409
    /**
410
     * Create a regular expression from the search pattern.
411
     *
412
     * @param Builder              $query
413
     * @param string               $column
414
     * @param array<string,string> $params
415
     *
416
     * @return void
417
     */
418
    private function recordQuery(Builder $query, string $column, array $params): void
419
    {
420
        $search = $params['search-for'];
421
        $method = $params['method'];
422
        $like   = '%' . addcslashes($search, '\\%_') . '%';
423
424
        switch ($method) {
425
            case 'exact':
426
            case 'words':
427
                $query->where($column, 'LIKE', $like);
428
                break;
429
430
            case 'wildcards':
431
                $like = strtr($like, ['?' => '_', '*' => '%']);
432
                $query->where($column, 'LIKE', $like);
433
                break;
434
435
            case 'regex':
436
                // Substituting newlines seems to be necessary on *some* versions
437
                // of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
438
                $search = strtr($search, ['\n' => "\n"]);
439
440
                $query->where($column, DB::regexOperator(), $search);
441
                break;
442
        }
443
    }
444
}
445