Passed
Push — dev ( 824cd4...d410ef )
by Greg
12:51
created

GedcomImportService::importRecord()   F

Complexity

Conditions 29
Paths 578

Size

Total Lines 196
Code Lines 131

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 29
eloc 131
nc 578
nop 3
dl 0
loc 196
rs 0.4688
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2021 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Services;
21
22
use Fisharebest\Webtrees\Date;
23
use Fisharebest\Webtrees\Elements\UnknownElement;
24
use Fisharebest\Webtrees\Exceptions\GedcomErrorException;
25
use Fisharebest\Webtrees\Family;
26
use Fisharebest\Webtrees\Gedcom;
27
use Fisharebest\Webtrees\Header;
28
use Fisharebest\Webtrees\Individual;
29
use Fisharebest\Webtrees\Location;
30
use Fisharebest\Webtrees\Media;
31
use Fisharebest\Webtrees\Note;
32
use Fisharebest\Webtrees\Place;
33
use Fisharebest\Webtrees\PlaceLocation;
34
use Fisharebest\Webtrees\Registry;
35
use Fisharebest\Webtrees\Repository;
36
use Fisharebest\Webtrees\Soundex;
37
use Fisharebest\Webtrees\Source;
38
use Fisharebest\Webtrees\Submission;
39
use Fisharebest\Webtrees\Submitter;
40
use Fisharebest\Webtrees\Tree;
41
use Illuminate\Database\Capsule\Manager as DB;
42
use Illuminate\Database\Query\JoinClause;
43
44
use function app;
45
use function array_chunk;
46
use function array_intersect_key;
47
use function array_map;
48
use function array_unique;
49
use function assert;
50
use function date;
51
use function explode;
52
use function max;
53
use function mb_substr;
54
use function preg_match;
55
use function preg_match_all;
56
use function preg_replace;
57
use function round;
58
use function str_contains;
59
use function str_replace;
60
use function str_starts_with;
61
use function strlen;
62
use function strtolower;
63
use function strtoupper;
64
use function strtr;
65
use function substr;
66
use function trim;
67
68
use const PREG_SET_ORDER;
69
70
/**
71
 * Class GedcomImportService - import GEDCOM data
72
 */
73
class GedcomImportService
74
{
75
    /**
76
     * Tidy up a gedcom record on import, so that we can access it consistently/efficiently.
77
     *
78
     * @param string $rec
79
     * @param Tree   $tree
80
     *
81
     * @return string
82
     */
83
    private function reformatRecord(string $rec, Tree $tree): string
84
    {
85
        $gedcom_service = app(GedcomService::class);
86
        assert($gedcom_service instanceof GedcomService);
87
88
        // Strip out mac/msdos line endings
89
        $rec = preg_replace("/[\r\n]+/", "\n", $rec);
90
91
        // Extract lines from the record; lines consist of: level + optional xref + tag + optional data
92
        $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER);
93
94
        // Process the record line-by-line
95
        $newrec = '';
96
        foreach ($matches as $n => $match) {
97
            [, $level, $xref, $tag, $data] = $match;
98
99
            $tag = $gedcom_service->canonicalTag($tag);
100
101
            switch ($tag) {
102
                case 'AFN':
103
                    // AFN values are upper case
104
                    $data = strtoupper($data);
105
                    break;
106
                case 'DATE':
107
                    // Preserve text from INT dates
108
                    if (str_contains($data, '(')) {
109
                        [$date, $text] = explode('(', $data, 2);
110
                        $text = ' (' . $text;
111
                    } else {
112
                        $date = $data;
113
                        $text = '';
114
                    }
115
                    // Capitals
116
                    $date = strtoupper($date);
117
                    // Temporarily add leading/trailing spaces, to allow efficient matching below
118
                    $date = ' ' . $date . ' ';
119
                    // Ensure space digits and letters
120
                    $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date);
121
                    $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date);
122
                    // Ensure space before/after calendar escapes
123
                    $date = preg_replace('/@#[^@]+@/', ' $0 ', $date);
124
                    // "BET." => "BET"
125
                    $date = preg_replace('/(\w\w)\./', '$1', $date);
126
                    // "CIR" => "ABT"
127
                    $date = str_replace(' CIR ', ' ABT ', $date);
128
                    $date = str_replace(' APX ', ' ABT ', $date);
129
                    // B.C. => BC (temporarily, to allow easier handling of ".")
130
                    $date = str_replace(' B.C. ', ' BC ', $date);
131
                    // TMG uses "EITHER X OR Y"
132
                    $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date);
133
                    // "BET X - Y " => "BET X AND Y"
134
                    $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date);
135
                    $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date);
136
                    // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y"
137
                    $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date);
138
                    $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date);
139
                    // "@#ESC@ AFT X" => "AFT @#ESC@ X"
140
                    $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date);
141
                    // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900"
142
                    // (don't change "/" - it is used in NS/OS dates)
143
                    $date = preg_replace('/[.,:;-]/', ' ', $date);
144
                    // BC => B.C.
145
                    $date = str_replace(' BC ', ' B.C. ', $date);
146
                    // Append the "INT" text
147
                    $data = $date . $text;
148
                    break;
149
                case '_FILE':
150
                    $tag = 'FILE';
151
                    break;
152
                case 'FORM':
153
                    // Consistent commas
154
                    $data = preg_replace('/ *, */', ', ', $data);
155
                    break;
156
                case 'HEAD':
157
                    // HEAD records don't have an XREF or DATA
158
                    if ($level === '0') {
159
                        $xref = '';
160
                        $data = '';
161
                    }
162
                    break;
163
                case 'NAME':
164
                    // Tidy up non-printing characters
165
                    $data = preg_replace('/  +/', ' ', trim($data));
166
                    break;
167
                case 'PEDI':
168
                    // PEDI values are lower case
169
                    $data = strtolower($data);
170
                    break;
171
                case 'PLAC':
172
                    // Consistent commas
173
                    $data = preg_replace('/ *[,,،] */u', ', ', $data);
174
                    // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W
175
                    if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) {
176
                        $data =
177
                            $match[1] . "\n" .
178
                            ($level + 1) . " MAP\n" .
179
                            ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" .
180
                            ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4));
181
                    }
182
                    break;
183
                case 'RESN':
184
                    // RESN values are lower case (confidential, privacy, locked, none)
185
                    $data = strtolower($data);
186
                    if ($data === 'invisible') {
187
                        $data = 'confidential'; // From old versions of Legacy.
188
                    }
189
                    break;
190
                case 'SEX':
191
                    $data = strtoupper($data);
192
                    break;
193
                case 'STAT':
194
                    if ($data === 'CANCELLED') {
195
                        // PhpGedView mis-spells this tag - correct it.
196
                        $data = 'CANCELED';
197
                    }
198
                    break;
199
                case 'TEMP':
200
                    // Temple codes are upper case
201
                    $data = strtoupper($data);
202
                    break;
203
                case 'TRLR':
204
                    // TRLR records don't have an XREF or DATA
205
                    if ($level === '0') {
206
                        $xref = '';
207
                        $data = '';
208
                    }
209
                    break;
210
            }
211
            // Suppress "Y", for facts/events with a DATE or PLAC
212
            if ($data === 'y') {
213
                $data = 'Y';
214
            }
215
            if ($level === '1' && $data === 'Y') {
216
                for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) {
217
                    if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') {
218
                        $data = '';
219
                        break;
220
                    }
221
                }
222
            }
223
            // Reassemble components back into a single line
224
            switch ($tag) {
225
                default:
226
                    // Remove tabs and multiple/leading/trailing spaces
227
                    $data = strtr($data, ["\t" => ' ']);
228
                    $data = trim($data, ' ');
229
                    while (str_contains($data, '  ')) {
230
                        $data = strtr($data, ['  ' => ' ']);
231
                    }
232
                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
233
                    break;
234
                case 'NOTE':
235
                case 'TEXT':
236
                case 'DATA':
237
                case 'CONT':
238
                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
239
                    break;
240
                case 'FILE':
241
                    // Strip off the user-defined path prefix
242
                    $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH');
243
                    if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) {
244
                        $data = substr($data, strlen($GEDCOM_MEDIA_PATH));
245
                    }
246
                    // convert backslashes in filenames to forward slashes
247
                    $data = preg_replace("/\\\\/", '/', $data);
248
249
                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
250
                    break;
251
                case 'CONC':
252
                    // Merge CONC lines, to simplify access later on.
253
                    $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data;
254
                    break;
255
            }
256
        }
257
258
        return $newrec;
259
    }
260
261
    /**
262
     * import record into database
263
     * this function will parse the given gedcom record and add it to the database
264
     *
265
     * @param string $gedrec the raw gedcom record to parse
266
     * @param Tree   $tree   import the record into this tree
267
     * @param bool   $update whether this is an updated record that has been accepted
268
     *
269
     * @return void
270
     * @throws GedcomErrorException
271
     */
272
    public function importRecord(string $gedrec, Tree $tree, bool $update): void
273
    {
274
        $tree_id = $tree->id();
275
276
        // Escaped @ signs (only if importing from file)
277
        if (!$update) {
278
            $gedrec = str_replace('@@', '@', $gedrec);
279
        }
280
281
        // Standardise gedcom format
282
        $gedrec = $this->reformatRecord($gedrec, $tree);
283
284
        // import different types of records
285
        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
286
            [, $xref, $type] = $match;
287
        } elseif (preg_match('/0 (HEAD|TRLR|_PLAC |_PLAC_DEFN)/', $gedrec, $match)) {
288
            $type = $match[1];
289
            $xref = $type; // For records without an XREF, use the type as a pseudo XREF.
290
        } else {
291
            throw new GedcomErrorException($gedrec);
292
        }
293
294
        // Add a _UID
295
        if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) {
296
            $element = Registry::elementFactory()->make($type . ':_UID');
297
            if (!$element instanceof UnknownElement) {
298
                $gedrec .= "\n1 _UID " . $element->default($tree);
299
            }
300
        }
301
302
        // If the user has downloaded their GEDCOM data (containing media objects) and edited it
303
        // using an application which does not support (and deletes) media objects, then add them
304
        // back in.
305
        if ($tree->getPreference('keep_media')) {
306
            $old_linked_media = DB::table('link')
307
                ->where('l_from', '=', $xref)
308
                ->where('l_file', '=', $tree_id)
309
                ->where('l_type', '=', 'OBJE')
310
                ->pluck('l_to');
311
312
            // Delete these links - so that we do not insert them again in updateLinks()
313
            DB::table('link')
314
                ->where('l_from', '=', $xref)
315
                ->where('l_file', '=', $tree_id)
316
                ->where('l_type', '=', 'OBJE')
317
                ->delete();
318
319
            foreach ($old_linked_media as $media_id) {
320
                $gedrec .= "\n1 OBJE @" . $media_id . '@';
321
            }
322
        }
323
324
        // Convert inline media into media objects
325
        $gedrec = $this->convertInlineMedia($tree, $gedrec);
326
327
        switch ($type) {
328
            case Individual::RECORD_TYPE:
329
                $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree);
330
331
                if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) {
332
                    $rin = $match[1];
333
                } else {
334
                    $rin = $xref;
335
                }
336
337
                DB::table('individuals')->insert([
338
                    'i_id'     => $xref,
339
                    'i_file'   => $tree_id,
340
                    'i_rin'    => $rin,
341
                    'i_sex'    => $record->sex(),
342
                    'i_gedcom' => $gedrec,
343
                ]);
344
345
                // Update the cross-reference/index tables.
346
                $this->updatePlaces($xref, $tree, $gedrec);
347
                $this->updateDates($xref, $tree_id, $gedrec);
348
                $this->updateNames($xref, $tree_id, $record);
349
                break;
350
351
            case Family::RECORD_TYPE:
352
                if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
353
                    $husb = $match[1];
354
                } else {
355
                    $husb = '';
356
                }
357
                if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
358
                    $wife = $match[1];
359
                } else {
360
                    $wife = '';
361
                }
362
                $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match);
363
                if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) {
364
                    $nchi = max($nchi, $match[1]);
365
                }
366
367
                DB::table('families')->insert([
368
                    'f_id'      => $xref,
369
                    'f_file'    => $tree_id,
370
                    'f_husb'    => $husb,
371
                    'f_wife'    => $wife,
372
                    'f_gedcom'  => $gedrec,
373
                    'f_numchil' => $nchi,
374
                ]);
375
376
                // Update the cross-reference/index tables.
377
                $this->updatePlaces($xref, $tree, $gedrec);
378
                $this->updateDates($xref, $tree_id, $gedrec);
379
                break;
380
381
            case Source::RECORD_TYPE:
382
                if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) {
383
                    $name = $match[1];
384
                } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) {
385
                    $name = $match[1];
386
                } else {
387
                    $name = $xref;
388
                }
389
390
                DB::table('sources')->insert([
391
                    's_id'     => $xref,
392
                    's_file'   => $tree_id,
393
                    's_name'   => mb_substr($name, 0, 255),
394
                    's_gedcom' => $gedrec,
395
                ]);
396
                break;
397
398
            case Repository::RECORD_TYPE:
399
            case Note::RECORD_TYPE:
400
            case Submission::RECORD_TYPE:
401
            case Submitter::RECORD_TYPE:
402
            case Location::RECORD_TYPE:
403
                DB::table('other')->insert([
404
                    'o_id'     => $xref,
405
                    'o_file'   => $tree_id,
406
                    'o_type'   => $type,
407
                    'o_gedcom' => $gedrec,
408
                ]);
409
                break;
410
411
            case Header::RECORD_TYPE:
412
                // Force HEAD records to have a creation date.
413
                if (!str_contains($gedrec, "\n1 DATE ")) {
414
                    $today = strtoupper(date('d M Y'));
415
                    $gedrec .= "\n1 DATE " . $today;
416
                }
417
418
                DB::table('other')->insert([
419
                    'o_id'     => $xref,
420
                    'o_file'   => $tree_id,
421
                    'o_type'   => Header::RECORD_TYPE,
422
                    'o_gedcom' => $gedrec,
423
                ]);
424
                break;
425
426
427
            case Media::RECORD_TYPE:
428
                $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree);
429
430
                DB::table('media')->insert([
431
                    'm_id'     => $xref,
432
                    'm_file'   => $tree_id,
433
                    'm_gedcom' => $gedrec,
434
                ]);
435
436
                foreach ($record->mediaFiles() as $media_file) {
437
                    DB::table('media_file')->insert([
438
                        'm_id'                 => $xref,
439
                        'm_file'               => $tree_id,
440
                        'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248),
441
                        'multimedia_format'    => mb_substr($media_file->format(), 0, 4),
442
                        'source_media_type'    => mb_substr($media_file->type(), 0, 15),
443
                        'descriptive_title'    => mb_substr($media_file->title(), 0, 248),
444
                    ]);
445
                }
446
                break;
447
448
            case '_PLAC ':
449
                $this->importTNGPlac($gedrec);
450
                return;
451
452
            case '_PLAC_DEFN':
453
                $this->importLegacyPlacDefn($gedrec);
454
                return;
455
456
            default: // Custom record types.
457
                DB::table('other')->insert([
458
                    'o_id'     => $xref,
459
                    'o_file'   => $tree_id,
460
                    'o_type'   => mb_substr($type, 0, 15),
461
                    'o_gedcom' => $gedrec,
462
                ]);
463
                break;
464
        }
465
466
        // Update the cross-reference/index tables.
467
        $this->updateLinks($xref, $tree_id, $gedrec);
468
    }
469
470
    /**
471
     * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values
472
     *
473
     * @param string $gedcom
474
     */
475
    private function importLegacyPlacDefn(string $gedcom): void
476
    {
477
        $gedcom_service = new GedcomService();
478
479
        if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) {
480
            $place_name = $match[1];
481
        } else {
482
            return;
483
        }
484
485
        if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) {
486
            $latitude = $gedcom_service->readLatitude($match[1]);
487
        } else {
488
            return;
489
        }
490
491
        if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) {
492
            $longitude = $gedcom_service->readLongitude($match[1]);
493
        } else {
494
            return;
495
        }
496
497
        $location = new PlaceLocation($place_name);
498
499
        if ($location->latitude() === null && $location->longitude() === null) {
500
            DB::table('place_location')
501
                ->where('id', '=', $location->id())
502
                ->update([
503
                    'latitude'  => $latitude,
504
                    'longitude' => $longitude,
505
                ]);
506
        }
507
    }
508
509
    /**
510
     * Legacy Family Tree software generates _PLAC records containing LAT/LONG values
511
     *
512
     * @param string $gedcom
513
     */
514
    private function importTNGPlac(string $gedcom): void
515
    {
516
        if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) {
517
            $place_name = $match[1];
518
        } else {
519
            return;
520
        }
521
522
        if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) {
523
            $latitude = (float) $match[1];
524
        } else {
525
            return;
526
        }
527
528
        if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) {
529
            $longitude = (float) $match[1];
530
        } else {
531
            return;
532
        }
533
534
        $location = new PlaceLocation($place_name);
535
536
        if ($location->latitude() === null && $location->longitude() === null) {
537
            DB::table('place_location')
538
                ->where('id', '=', $location->id())
539
                ->update([
540
                    'latitude'  => $latitude,
541
                    'longitude' => $longitude,
542
                ]);
543
        }
544
    }
545
546
    /**
547
     * Extract all level 2 places from the given record and insert them into the places table
548
     *
549
     * @param string $xref
550
     * @param Tree   $tree
551
     * @param string $gedrec
552
     *
553
     * @return void
554
     */
555
    public function updatePlaces(string $xref, Tree $tree, string $gedrec): void
556
    {
557
        // Insert all new rows together
558
        $rows = [];
559
560
        preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches);
561
562
        $places = array_unique($matches[1]);
563
564
        foreach ($places as $place_name) {
565
            $place = new Place($place_name, $tree);
566
567
            // Calling Place::id() will create the entry in the database, if it doesn't already exist.
568
            while ($place->id() !== 0) {
569
                $rows[] = [
570
                    'pl_p_id' => $place->id(),
571
                    'pl_gid'  => $xref,
572
                    'pl_file' => $tree->id(),
573
                ];
574
575
                $place = $place->parent();
576
            }
577
        }
578
579
        // array_unique doesn't work with arrays of arrays
580
        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
581
582
        // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders.
583
        foreach (array_chunk($rows, 20000) as $chunk) {
584
            DB::table('placelinks')->insert($chunk);
585
        }
586
    }
587
588
    /**
589
     * Extract all the dates from the given record and insert them into the database.
590
     *
591
     * @param string $xref
592
     * @param int    $ged_id
593
     * @param string $gedrec
594
     *
595
     * @return void
596
     */
597
    private function updateDates(string $xref, int $ged_id, string $gedrec): void
598
    {
599
        // Insert all new rows together
600
        $rows = [];
601
602
        preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER);
603
604
        foreach ($matches as $match) {
605
            $fact = $match[1];
606
            $date = new Date($match[2]);
607
            $rows[] = [
608
                'd_day'        => $date->minimumDate()->day,
609
                'd_month'      => $date->minimumDate()->format('%O'),
610
                'd_mon'        => $date->minimumDate()->month,
611
                'd_year'       => $date->minimumDate()->year,
612
                'd_julianday1' => $date->minimumDate()->minimumJulianDay(),
613
                'd_julianday2' => $date->minimumDate()->maximumJulianDay(),
614
                'd_fact'       => $fact,
615
                'd_gid'        => $xref,
616
                'd_file'       => $ged_id,
617
                'd_type'       => $date->minimumDate()->format('%@'),
618
            ];
619
620
            $rows[] = [
621
                'd_day'        => $date->maximumDate()->day,
622
                'd_month'      => $date->maximumDate()->format('%O'),
623
                'd_mon'        => $date->maximumDate()->month,
624
                'd_year'       => $date->maximumDate()->year,
625
                'd_julianday1' => $date->maximumDate()->minimumJulianDay(),
626
                'd_julianday2' => $date->maximumDate()->maximumJulianDay(),
627
                'd_fact'       => $fact,
628
                'd_gid'        => $xref,
629
                'd_file'       => $ged_id,
630
                'd_type'       => $date->minimumDate()->format('%@'),
631
            ];
632
        }
633
634
        // array_unique doesn't work with arrays of arrays
635
        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
636
637
        DB::table('dates')->insert($rows);
638
    }
639
640
    /**
641
     * Extract all the links from the given record and insert them into the database
642
     *
643
     * @param string $xref
644
     * @param int    $ged_id
645
     * @param string $gedrec
646
     *
647
     * @return void
648
     */
649
    private function updateLinks(string $xref, int $ged_id, string $gedrec): void
650
    {
651
        // Insert all new rows together
652
        $rows = [];
653
654
        preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER);
655
656
        foreach ($matches as $match) {
657
            // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@"
658
            $rows[$match[1] . strtoupper($match[2])] = [
659
                'l_from' => $xref,
660
                'l_to'   => $match[2],
661
                'l_type' => $match[1],
662
                'l_file' => $ged_id,
663
            ];
664
        }
665
666
        DB::table('link')->insert($rows);
667
    }
668
669
    /**
670
     * Extract all the names from the given record and insert them into the database.
671
     *
672
     * @param string     $xref
673
     * @param int        $ged_id
674
     * @param Individual $record
675
     *
676
     * @return void
677
     */
678
    private function updateNames(string $xref, int $ged_id, Individual $record): void
679
    {
680
        // Insert all new rows together
681
        $rows = [];
682
683
        foreach ($record->getAllNames() as $n => $name) {
684
            if ($name['givn'] === Individual::PRAENOMEN_NESCIO) {
685
                $soundex_givn_std = null;
686
                $soundex_givn_dm  = null;
687
            } else {
688
                $soundex_givn_std = Soundex::russell($name['givn']);
689
                $soundex_givn_dm  = Soundex::daitchMokotoff($name['givn']);
690
            }
691
692
            if ($name['surn'] === Individual::NOMEN_NESCIO) {
693
                $soundex_surn_std = null;
694
                $soundex_surn_dm  = null;
695
            } else {
696
                $soundex_surn_std = Soundex::russell($name['surname']);
697
                $soundex_surn_dm  = Soundex::daitchMokotoff($name['surname']);
698
            }
699
700
            $rows[] = [
701
                'n_file'             => $ged_id,
702
                'n_id'               => $xref,
703
                'n_num'              => $n,
704
                'n_type'             => $name['type'],
705
                'n_sort'             => mb_substr($name['sort'], 0, 255),
706
                'n_full'             => mb_substr($name['fullNN'], 0, 255),
707
                'n_surname'          => mb_substr($name['surname'], 0, 255),
708
                'n_surn'             => mb_substr($name['surn'], 0, 255),
709
                'n_givn'             => mb_substr($name['givn'], 0, 255),
710
                'n_soundex_givn_std' => $soundex_givn_std,
711
                'n_soundex_surn_std' => $soundex_surn_std,
712
                'n_soundex_givn_dm'  => $soundex_givn_dm,
713
                'n_soundex_surn_dm'  => $soundex_surn_dm,
714
            ];
715
        }
716
717
        DB::table('name')->insert($rows);
718
    }
719
720
    /**
721
     * Extract inline media data, and convert to media objects.
722
     *
723
     * @param Tree   $tree
724
     * @param string $gedcom
725
     *
726
     * @return string
727
     */
728
    private function convertInlineMedia(Tree $tree, string $gedcom): string
729
    {
730
        while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) {
731
            $xref   = $this->createMediaObject($match[0], $tree);
732
            $gedcom = strtr($gedcom, [$match[0] =>  "\n1 OBJE @" . $xref . '@']);
733
        }
734
        while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) {
735
            $xref   = $this->createMediaObject($match[0], $tree);
736
            $gedcom = strtr($gedcom, [$match[0] =>  "\n2 OBJE @" . $xref . '@']);
737
        }
738
        while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) {
739
            $xref   = $this->createMediaObject($match[0], $tree);
740
            $gedcom = strtr($gedcom, [$match[0] =>  "\n3 OBJE @" . $xref . '@']);
741
        }
742
743
        return $gedcom;
744
    }
745
746
    /**
747
     * Create a new media object, from inline media data.
748
     *
749
     * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL
750
     * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL
751
     * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies:  +1 FILE / +1 FORM / +2 MEDI
752
     *
753
     * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE
754
     * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL
755
     *
756
     * @param string $gedcom
757
     * @param Tree   $tree
758
     *
759
     * @return string
760
     */
761
    private function createMediaObject(string $gedcom, Tree $tree): string
762
    {
763
        preg_match('/\n\d FILE (.+)/', $gedcom, $match);
764
        $file = $match[1] ?? '';
765
766
        preg_match('/\n\d TITL (.+)/', $gedcom, $match);
767
        $title = $match[1] ?? '';
768
769
        preg_match('/\n\d FORM (.+)/', $gedcom, $match);
770
        $format = $match[1] ?? '';
771
772
        preg_match('/\n\d MEDI (.+)/', $gedcom, $match);
773
        $media = $match[1] ?? '';
774
775
        preg_match('/\n\d _SCBK (.+)/', $gedcom, $match);
776
        $scrapbook = $match[1] ?? '';
777
778
        preg_match('/\n\d _PRIM (.+)/', $gedcom, $match);
779
        $primary = $match[1] ?? '';
780
781
        preg_match('/\n\d _TYPE (.+)/', $gedcom, $match);
782
        if ($media === '') {
783
            // Legacy uses _TYPE instead of MEDI
784
            $media = $match[1] ?? '';
785
            $type  = '';
786
        } else {
787
            $type = $match[1] ?? '';
788
        }
789
790
        preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches);
791
        $notes = $matches[1] ?? [];
792
793
        // Have we already created a media object with the same title/filename?
794
        $xref = DB::table('media_file')
795
            ->where('m_file', '=', $tree->id())
796
            ->where('descriptive_title', '=', mb_substr($title, 0, 248))
797
            ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248))
798
            ->value('m_id');
799
800
        if ($xref === null) {
801
            $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE);
802
803
            // convert to a media-object
804
            $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file;
805
806
            if ($format !== '') {
807
                $gedcom .= "\n2 FORM " . $format;
808
809
                if ($media !== '') {
810
                    $gedcom .= "\n3 TYPE " . $media;
811
                }
812
            }
813
814
            if ($title !== '') {
815
                $gedcom .= "\n3 TITL " . $title;
816
            }
817
818
            if ($scrapbook !== '') {
819
                $gedcom .= "\n1 _SCBK " . $scrapbook;
820
            }
821
822
            if ($primary !== '') {
823
                $gedcom .= "\n1 _PRIM " . $primary;
824
            }
825
826
            if ($type !== '') {
827
                $gedcom .= "\n1 _TYPE " . $type;
828
            }
829
830
            foreach ($notes as $note) {
831
                $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]);
832
            }
833
834
            DB::table('media')->insert([
835
                'm_id'     => $xref,
836
                'm_file'   => $tree->id(),
837
                'm_gedcom' => $gedcom,
838
            ]);
839
840
            DB::table('media_file')->insert([
841
                'm_id'                 => $xref,
842
                'm_file'               => $tree->id(),
843
                'multimedia_file_refn' => mb_substr($file, 0, 248),
844
                'multimedia_format'    => mb_substr($format, 0, 4),
845
                'source_media_type'    => mb_substr($media, 0, 15),
846
                'descriptive_title'    => mb_substr($title, 0, 248),
847
            ]);
848
        }
849
850
        return $xref;
851
    }
852
853
    /**
854
     * update a record in the database
855
     *
856
     * @param string $gedrec
857
     * @param Tree   $tree
858
     * @param bool   $delete
859
     *
860
     * @return void
861
     * @throws GedcomErrorException
862
     */
863
    public function updateRecord(string $gedrec, Tree $tree, bool $delete): void
864
    {
865
        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
866
            [, $gid, $type] = $match;
867
        } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) {
868
            // The HEAD record has no XREF.  Any others?
869
            $gid  = $match[1];
870
            $type = $match[1];
871
        } else {
872
            throw new GedcomErrorException($gedrec);
873
        }
874
875
        // Place links
876
        DB::table('placelinks')
877
            ->where('pl_gid', '=', $gid)
878
            ->where('pl_file', '=', $tree->id())
879
            ->delete();
880
881
        // Orphaned places.  If we're deleting  "Westminster, London, England",
882
        // then we may also need to delete "London, England" and "England".
883
        do {
884
            $affected = DB::table('places')
885
                ->leftJoin('placelinks', function (JoinClause $join): void {
886
                    $join
887
                        ->on('p_id', '=', 'pl_p_id')
888
                        ->on('p_file', '=', 'pl_file');
889
                })
890
                ->whereNull('pl_p_id')
891
                ->delete();
892
        } while ($affected > 0);
893
894
        DB::table('dates')
895
            ->where('d_gid', '=', $gid)
896
            ->where('d_file', '=', $tree->id())
897
            ->delete();
898
899
        DB::table('name')
900
            ->where('n_id', '=', $gid)
901
            ->where('n_file', '=', $tree->id())
902
            ->delete();
903
904
        DB::table('link')
905
            ->where('l_from', '=', $gid)
906
            ->where('l_file', '=', $tree->id())
907
            ->delete();
908
909
        switch ($type) {
910
            case Individual::RECORD_TYPE:
911
                DB::table('individuals')
912
                    ->where('i_id', '=', $gid)
913
                    ->where('i_file', '=', $tree->id())
914
                    ->delete();
915
                break;
916
917
            case Family::RECORD_TYPE:
918
                DB::table('families')
919
                    ->where('f_id', '=', $gid)
920
                    ->where('f_file', '=', $tree->id())
921
                    ->delete();
922
                break;
923
924
            case Source::RECORD_TYPE:
925
                DB::table('sources')
926
                    ->where('s_id', '=', $gid)
927
                    ->where('s_file', '=', $tree->id())
928
                    ->delete();
929
                break;
930
931
            case Media::RECORD_TYPE:
932
                DB::table('media_file')
933
                    ->where('m_id', '=', $gid)
934
                    ->where('m_file', '=', $tree->id())
935
                    ->delete();
936
937
                DB::table('media')
938
                    ->where('m_id', '=', $gid)
939
                    ->where('m_file', '=', $tree->id())
940
                    ->delete();
941
                break;
942
943
            default:
944
                DB::table('other')
945
                    ->where('o_id', '=', $gid)
946
                    ->where('o_file', '=', $tree->id())
947
                    ->delete();
948
                break;
949
        }
950
951
        if (!$delete) {
952
            $this->importRecord($gedrec, $tree, true);
953
        }
954
    }
955
}
956