Passed
Push — master ( 506a57...22e73d )
by Greg
06:57
created

FunctionsImport::acceptAllChanges()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 25
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 17
nc 3
nop 2
dl 0
loc 25
rs 9.7
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2019 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Functions;
21
22
use Fisharebest\Webtrees\Date;
23
use Fisharebest\Webtrees\Exceptions\GedcomErrorException;
24
use Fisharebest\Webtrees\Gedcom;
25
use Fisharebest\Webtrees\GedcomRecord;
26
use Fisharebest\Webtrees\GedcomTag;
27
use Fisharebest\Webtrees\Individual;
28
use Fisharebest\Webtrees\Log;
29
use Fisharebest\Webtrees\Media;
30
use Fisharebest\Webtrees\Note;
31
use Fisharebest\Webtrees\Place;
32
use Fisharebest\Webtrees\Repository;
33
use Fisharebest\Webtrees\Soundex;
34
use Fisharebest\Webtrees\Source;
35
use Fisharebest\Webtrees\Tree;
36
use Illuminate\Database\Capsule\Manager as DB;
37
use Illuminate\Database\Query\JoinClause;
38
use PDOException;
39
40
/**
41
 * Class FunctionsImport - common functions
42
 */
43
class FunctionsImport
44
{
45
    /**
46
     * Tidy up a gedcom record on import, so that we can access it consistently/efficiently.
47
     *
48
     * @param string $rec
49
     * @param Tree   $tree
50
     *
51
     * @return string
52
     */
53
    public static function reformatRecord($rec, Tree $tree): string
54
    {
55
        // Strip out mac/msdos line endings
56
        $rec = preg_replace("/[\r\n]+/", "\n", $rec);
57
58
        // Extract lines from the record; lines consist of: level + optional xref + tag + optional data
59
        $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER);
60
61
        // Process the record line-by-line
62
        $newrec = '';
63
        foreach ($matches as $n => $match) {
64
            [, $level, $xref, $tag, $data] = $match;
65
            $tag = strtoupper($tag); // Tags should always be upper case
66
            switch ($tag) {
67
                // Convert PhpGedView tags to WT
68
                case '_PGVU':
69
                    $tag = '_WT_USER';
70
                    break;
71
                case '_PGV_OBJS':
72
                    $tag = '_WT_OBJE_SORT';
73
                    break;
74
                // Convert FTM-style "TAG_FORMAL_NAME" into "TAG".
75
                case 'ABBREVIATION':
76
                    $tag = 'ABBR';
77
                    break;
78
                case 'ADDRESS':
79
                    $tag = 'ADDR';
80
                    break;
81
                case 'ADDRESS1':
82
                    $tag = 'ADR1';
83
                    break;
84
                case 'ADDRESS2':
85
                    $tag = 'ADR2';
86
                    break;
87
                case 'ADDRESS3':
88
                    $tag = 'ADR3';
89
                    break;
90
                case 'ADOPTION':
91
                    $tag = 'ADOP';
92
                    break;
93
                case 'ADULT_CHRISTENING':
94
                    $tag = 'CHRA';
95
                    break;
96
                case 'AFN':
97
                    // AFN values are upper case
98
                    $data = strtoupper($data);
99
                    break;
100
                case 'AGENCY':
101
                    $tag = 'AGNC';
102
                    break;
103
                case 'ALIAS':
104
                    $tag = 'ALIA';
105
                    break;
106
                case 'ANCESTORS':
107
                    $tag = 'ANCE';
108
                    break;
109
                case 'ANCES_INTEREST':
110
                    $tag = 'ANCI';
111
                    break;
112
                case 'ANNULMENT':
113
                    $tag = 'ANUL';
114
                    break;
115
                case 'ASSOCIATES':
116
                    $tag = 'ASSO';
117
                    break;
118
                case 'AUTHOR':
119
                    $tag = 'AUTH';
120
                    break;
121
                case 'BAPTISM':
122
                    $tag = 'BAPM';
123
                    break;
124
                case 'BAPTISM_LDS':
125
                    $tag = 'BAPL';
126
                    break;
127
                case 'BAR_MITZVAH':
128
                    $tag = 'BARM';
129
                    break;
130
                case 'BAS_MITZVAH':
131
                    $tag = 'BASM';
132
                    break;
133
                case 'BIRTH':
134
                    $tag = 'BIRT';
135
                    break;
136
                case 'BLESSING':
137
                    $tag = 'BLES';
138
                    break;
139
                case 'BURIAL':
140
                    $tag = 'BURI';
141
                    break;
142
                case 'CALL_NUMBER':
143
                    $tag = 'CALN';
144
                    break;
145
                case 'CASTE':
146
                    $tag = 'CAST';
147
                    break;
148
                case 'CAUSE':
149
                    $tag = 'CAUS';
150
                    break;
151
                case 'CENSUS':
152
                    $tag = 'CENS';
153
                    break;
154
                case 'CHANGE':
155
                    $tag = 'CHAN';
156
                    break;
157
                case 'CHARACTER':
158
                    $tag = 'CHAR';
159
                    break;
160
                case 'CHILD':
161
                    $tag = 'CHIL';
162
                    break;
163
                case 'CHILDREN_COUNT':
164
                    $tag = 'NCHI';
165
                    break;
166
                case 'CHRISTENING':
167
                    $tag = 'CHR';
168
                    break;
169
                case 'CONCATENATION':
170
                    $tag = 'CONC';
171
                    break;
172
                case 'CONFIRMATION':
173
                    $tag = 'CONF';
174
                    break;
175
                case 'CONFIRMATION_LDS':
176
                    $tag = 'CONL';
177
                    break;
178
                case 'CONTINUED':
179
                    $tag = 'CONT';
180
                    break;
181
                case 'COPYRIGHT':
182
                    $tag = 'COPR';
183
                    break;
184
                case 'CORPORATE':
185
                    $tag = 'CORP';
186
                    break;
187
                case 'COUNTRY':
188
                    $tag = 'CTRY';
189
                    break;
190
                case 'CREMATION':
191
                    $tag = 'CREM';
192
                    break;
193
                case 'DATE':
194
                    // Preserve text from INT dates
195
                    if (strpos($data, '(') !== false) {
196
                        [$date, $text] = explode('(', $data, 2);
197
                        $text = ' (' . $text;
198
                    } else {
199
                        $date = $data;
200
                        $text = '';
201
                    }
202
                    // Capitals
203
                    $date = strtoupper($date);
204
                    // Temporarily add leading/trailing spaces, to allow efficient matching below
205
                    $date = " {$date} ";
206
                    // Ensure space digits and letters
207
                    $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date);
208
                    $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date);
209
                    // Ensure space before/after calendar escapes
210
                    $date = preg_replace('/@#[^@]+@/', ' $0 ', $date);
211
                    // "BET." => "BET"
212
                    $date = preg_replace('/(\w\w)\./', '$1', $date);
213
                    // "CIR" => "ABT"
214
                    $date = str_replace(' CIR ', ' ABT ', $date);
215
                    $date = str_replace(' APX ', ' ABT ', $date);
216
                    // B.C. => BC (temporarily, to allow easier handling of ".")
217
                    $date = str_replace(' B.C. ', ' BC ', $date);
218
                    // TMG uses "EITHER X OR Y"
219
                    $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date);
220
                    // "BET X - Y " => "BET X AND Y"
221
                    $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date);
222
                    $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date);
223
                    // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y"
224
                    $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date);
225
                    $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date);
226
                    // "@#ESC@ AFT X" => "AFT @#ESC@ X"
227
                    $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date);
228
                    // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900"
229
                    // (don't change "/" - it is used in NS/OS dates)
230
                    $date = preg_replace('/[.,:;-]/', ' ', $date);
231
                    // BC => B.C.
232
                    $date = str_replace(' BC ', ' B.C. ', $date);
233
                    // Append the "INT" text
234
                    $data = $date . $text;
235
                    break;
236
                case 'DEATH':
237
                    $tag = 'DEAT';
238
                    break;
239
                case '_DEATH_OF_SPOUSE':
240
                    $tag = '_DETS';
241
                    break;
242
                case '_DEGREE':
243
                    $tag = '_DEG';
244
                    break;
245
                case 'DESCENDANTS':
246
                    $tag = 'DESC';
247
                    break;
248
                case 'DESCENDANT_INT':
249
                    $tag = 'DESI';
250
                    break;
251
                case 'DESTINATION':
252
                    $tag = 'DEST';
253
                    break;
254
                case 'DIVORCE':
255
                    $tag = 'DIV';
256
                    break;
257
                case 'DIVORCE_FILED':
258
                    $tag = 'DIVF';
259
                    break;
260
                case 'EDUCATION':
261
                    $tag = 'EDUC';
262
                    break;
263
                case 'EMIGRATION':
264
                    $tag = 'EMIG';
265
                    break;
266
                case 'ENDOWMENT':
267
                    $tag = 'ENDL';
268
                    break;
269
                case 'ENGAGEMENT':
270
                    $tag = 'ENGA';
271
                    break;
272
                case 'EVENT':
273
                    $tag = 'EVEN';
274
                    break;
275
                case 'FACSIMILE':
276
                    $tag = 'FAX';
277
                    break;
278
                case 'FAMILY':
279
                    $tag = 'FAM';
280
                    break;
281
                case 'FAMILY_CHILD':
282
                    $tag = 'FAMC';
283
                    break;
284
                case 'FAMILY_FILE':
285
                    $tag = 'FAMF';
286
                    break;
287
                case 'FAMILY_SPOUSE':
288
                    $tag = 'FAMS';
289
                    break;
290
                case 'FIRST_COMMUNION':
291
                    $tag = 'FCOM';
292
                    break;
293
                case '_FILE':
294
                    $tag = 'FILE';
295
                    break;
296
                case 'FORMAT':
297
                case 'FORM':
298
                    $tag = 'FORM';
299
                    // Consistent commas
300
                    $data = preg_replace('/ *, */', ', ', $data);
301
                    break;
302
                case 'GEDCOM':
303
                    $tag = 'GEDC';
304
                    break;
305
                case 'GIVEN_NAME':
306
                    $tag = 'GIVN';
307
                    break;
308
                case 'GRADUATION':
309
                    $tag = 'GRAD';
310
                    break;
311
                case 'HEADER':
312
                case 'HEAD':
313
                    $tag = 'HEAD';
314
                    // HEAD records don't have an XREF or DATA
315
                    if ($level === '0') {
316
                        $xref = '';
317
                        $data = '';
318
                    }
319
                    break;
320
                case 'HUSBAND':
321
                    $tag = 'HUSB';
322
                    break;
323
                case 'IDENT_NUMBER':
324
                    $tag = 'IDNO';
325
                    break;
326
                case 'IMMIGRATION':
327
                    $tag = 'IMMI';
328
                    break;
329
                case 'INDIVIDUAL':
330
                    $tag = 'INDI';
331
                    break;
332
                case 'LANGUAGE':
333
                    $tag = 'LANG';
334
                    break;
335
                case 'LATITUDE':
336
                    $tag = 'LATI';
337
                    break;
338
                case 'LONGITUDE':
339
                    $tag = 'LONG';
340
                    break;
341
                case 'MARRIAGE':
342
                    $tag = 'MARR';
343
                    break;
344
                case 'MARRIAGE_BANN':
345
                    $tag = 'MARB';
346
                    break;
347
                case 'MARRIAGE_COUNT':
348
                    $tag = 'NMR';
349
                    break;
350
                case 'MARRIAGE_CONTRACT':
351
                    $tag = 'MARC';
352
                    break;
353
                case 'MARRIAGE_LICENSE':
354
                    $tag = 'MARL';
355
                    break;
356
                case 'MARRIAGE_SETTLEMENT':
357
                    $tag = 'MARS';
358
                    break;
359
                case 'MEDIA':
360
                    $tag = 'MEDI';
361
                    break;
362
                case '_MEDICAL':
363
                    $tag = '_MDCL';
364
                    break;
365
                case '_MILITARY_SERVICE':
366
                    $tag = '_MILT';
367
                    break;
368
                case 'NAME':
369
                    // Tidy up whitespace
370
                    $data = preg_replace('/  +/', ' ', trim($data));
371
                    break;
372
                case 'NAME_PREFIX':
373
                    $tag = 'NPFX';
374
                    break;
375
                case 'NAME_SUFFIX':
376
                    $tag = 'NSFX';
377
                    break;
378
                case 'NATIONALITY':
379
                    $tag = 'NATI';
380
                    break;
381
                case 'NATURALIZATION':
382
                    $tag = 'NATU';
383
                    break;
384
                case 'NICKNAME':
385
                    $tag = 'NICK';
386
                    break;
387
                case 'OBJECT':
388
                    $tag = 'OBJE';
389
                    break;
390
                case 'OCCUPATION':
391
                    $tag = 'OCCU';
392
                    break;
393
                case 'ORDINANCE':
394
                    $tag = 'ORDI';
395
                    break;
396
                case 'ORDINATION':
397
                    $tag = 'ORDN';
398
                    break;
399
                case 'PEDIGREE':
400
                case 'PEDI':
401
                    $tag = 'PEDI';
402
                    // PEDI values are lower case
403
                    $data = strtolower($data);
404
                    break;
405
                case 'PHONE':
406
                    $tag = 'PHON';
407
                    break;
408
                case 'PHONETIC':
409
                    $tag = 'FONE';
410
                    break;
411
                case 'PHY_DESCRIPTION':
412
                    $tag = 'DSCR';
413
                    break;
414
                case 'PLACE':
415
                case 'PLAC':
416
                    $tag = 'PLAC';
417
                    // Consistent commas
418
                    $data = preg_replace('/ *, */', ', ', $data);
419
                    // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W
420
                    if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) {
421
                        $data =
422
                            $match[1] . "\n" .
423
                            ($level + 1) . " MAP\n" .
424
                            ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" .
425
                            ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4));
426
                    }
427
                    break;
428
                case 'POSTAL_CODE':
429
                    $tag = 'POST';
430
                    break;
431
                case 'PROBATE':
432
                    $tag = 'PROB';
433
                    break;
434
                case 'PROPERTY':
435
                    $tag = 'PROP';
436
                    break;
437
                case 'PUBLICATION':
438
                    $tag = 'PUBL';
439
                    break;
440
                case 'QUALITY_OF_DATA':
441
                    $tag = 'QUAL';
442
                    break;
443
                case 'REC_FILE_NUMBER':
444
                    $tag = 'RFN';
445
                    break;
446
                case 'REC_ID_NUMBER':
447
                    $tag = 'RIN';
448
                    break;
449
                case 'REFERENCE':
450
                    $tag = 'REFN';
451
                    break;
452
                case 'RELATIONSHIP':
453
                    $tag = 'RELA';
454
                    break;
455
                case 'RELIGION':
456
                    $tag = 'RELI';
457
                    break;
458
                case 'REPOSITORY':
459
                    $tag = 'REPO';
460
                    break;
461
                case 'RESIDENCE':
462
                    $tag = 'RESI';
463
                    break;
464
                case 'RESTRICTION':
465
                case 'RESN':
466
                    $tag = 'RESN';
467
                    // RESN values are lower case (confidential, privacy, locked, none)
468
                    $data = strtolower($data);
469
                    if ($data === 'invisible') {
470
                        $data = 'confidential'; // From old versions of Legacy.
471
                    }
472
                    break;
473
                case 'RETIREMENT':
474
                    $tag = 'RETI';
475
                    break;
476
                case 'ROMANIZED':
477
                    $tag = 'ROMN';
478
                    break;
479
                case 'SEALING_CHILD':
480
                    $tag = 'SLGC';
481
                    break;
482
                case 'SEALING_SPOUSE':
483
                    $tag = 'SLGS';
484
                    break;
485
                case 'SOC_SEC_NUMBER':
486
                    $tag = 'SSN';
487
                    break;
488
                case 'SEX':
489
                    $data = strtoupper($data);
490
                    break;
491
                case 'SOURCE':
492
                    $tag = 'SOUR';
493
                    break;
494
                case 'STATE':
495
                    $tag = 'STAE';
496
                    break;
497
                case 'STATUS':
498
                case 'STAT':
499
                    $tag = 'STAT';
500
                    if ($data === 'CANCELLED') {
501
                        // PhpGedView mis-spells this tag - correct it.
502
                        $data = 'CANCELED';
503
                    }
504
                    break;
505
                case 'SUBMISSION':
506
                    $tag = 'SUBN';
507
                    break;
508
                case 'SUBMITTER':
509
                    $tag = 'SUBM';
510
                    break;
511
                case 'SURNAME':
512
                    $tag = 'SURN';
513
                    break;
514
                case 'SURN_PREFIX':
515
                    $tag = 'SPFX';
516
                    break;
517
                case 'TEMPLE':
518
                case 'TEMP':
519
                    $tag = 'TEMP';
520
                    // Temple codes are upper case
521
                    $data = strtoupper($data);
522
                    break;
523
                case 'TITLE':
524
                    $tag = 'TITL';
525
                    break;
526
                case 'TRAILER':
527
                case 'TRLR':
528
                    $tag = 'TRLR';
529
                    // TRLR records don't have an XREF or DATA
530
                    if ($level === '0') {
531
                        $xref = '';
532
                        $data = '';
533
                    }
534
                    break;
535
                case 'VERSION':
536
                    $tag = 'VERS';
537
                    break;
538
                case 'WEB':
539
                    $tag = 'WWW';
540
                    break;
541
            }
542
            // Suppress "Y", for facts/events with a DATE or PLAC
543
            if ($data === 'y') {
544
                $data = 'Y';
545
            }
546
            if ($level === '1' && $data === 'Y') {
547
                for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) {
548
                    if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') {
549
                        $data = '';
550
                        break;
551
                    }
552
                }
553
            }
554
            // Reassemble components back into a single line
555
            switch ($tag) {
556
                default:
557
                    // Remove tabs and multiple/leading/trailing spaces
558
                    if (strpos($data, "\t") !== false) {
559
                        $data = str_replace("\t", ' ', $data);
560
                    }
561
                    if (substr($data, 0, 1) === ' ' || substr($data, -1, 1) === ' ') {
562
                        $data = trim($data);
563
                    }
564
                    while (strpos($data, '  ')) {
565
                        $data = str_replace('  ', ' ', $data);
566
                    }
567
                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
568
                    break;
569
                case 'NOTE':
570
                case 'TEXT':
571
                case 'DATA':
572
                case 'CONT':
573
                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
574
                    break;
575
                case 'FILE':
576
                    // Strip off the user-defined path prefix
577
                    $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH');
578
                    if ($GEDCOM_MEDIA_PATH && strpos($data, $GEDCOM_MEDIA_PATH) === 0) {
579
                        $data = substr($data, strlen($GEDCOM_MEDIA_PATH));
580
                    }
581
                    // convert backslashes in filenames to forward slashes
582
                    $data = preg_replace("/\\\\/", '/', $data);
583
584
                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
585
                    break;
586
                case 'CONC':
587
                    // Merge CONC lines, to simplify access later on.
588
                    $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data;
589
                    break;
590
            }
591
        }
592
593
        return $newrec;
594
    }
595
596
    /**
597
     * import record into database
598
     * this function will parse the given gedcom record and add it to the database
599
     *
600
     * @param string $gedrec the raw gedcom record to parse
601
     * @param Tree   $tree   import the record into this tree
602
     * @param bool   $update whether or not this is an updated record that has been accepted
603
     *
604
     * @return void
605
     * @throws GedcomErrorException
606
     */
607
    public static function importRecord($gedrec, Tree $tree, $update): void
608
    {
609
        $tree_id = $tree->id();
610
611
        // Escaped @ signs (only if importing from file)
612
        if (!$update) {
613
            $gedrec = str_replace('@@', '@', $gedrec);
614
        }
615
616
        // Standardise gedcom format
617
        $gedrec = self::reformatRecord($gedrec, $tree);
618
619
        // import different types of records
620
        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
621
            [, $xref, $type] = $match;
622
            // check for a _UID, if the record doesn't have one, add one
623
            if ($tree->getPreference('GENERATE_UIDS') && !strpos($gedrec, "\n1 _UID ")) {
624
                $gedrec .= "\n1 _UID " . GedcomTag::createUid();
625
            }
626
        } elseif (preg_match('/0 (HEAD|TRLR)/', $gedrec, $match)) {
627
            $type = $match[1];
628
            $xref = $type; // For HEAD/TRLR, use type as pseudo XREF.
629
        } else {
630
            throw new GedcomErrorException($gedrec);
631
        }
632
633
        // If the user has downloaded their GEDCOM data (containing media objects) and edited it
634
        // using an application which does not support (and deletes) media objects, then add them
635
        // back in.
636
        if ($tree->getPreference('keep_media') && $xref) {
637
            $old_linked_media = DB::table('link')
638
                ->where('l_from', '=', $xref)
639
                ->where('l_file', '=', $tree_id)
640
                ->where('l_type', '=', 'OBJE')
641
                ->pluck('l_to');
642
643
            foreach ($old_linked_media as $media_id) {
644
                $gedrec .= "\n1 OBJE @" . $media_id . '@';
645
            }
646
        }
647
648
        switch ($type) {
649
            case 'INDI':
650
                // Convert inline media into media objects
651
                $gedrec = self::convertInlineMedia($tree, $gedrec);
652
653
                $record = new Individual($xref, $gedrec, null, $tree);
654
                if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) {
655
                    $rin = $match[1];
656
                } else {
657
                    $rin = $xref;
658
                }
659
660
                DB::table('individuals')->insert([
661
                    'i_id'     => $xref,
662
                    'i_file'   => $tree_id,
663
                    'i_rin'    => $rin,
664
                    'i_sex'    => $record->sex(),
665
                    'i_gedcom' => $gedrec,
666
                ]);
667
668
                // Update the cross-reference/index tables.
669
                self::updatePlaces($xref, $tree, $gedrec);
670
                self::updateDates($xref, $tree_id, $gedrec);
671
                self::updateLinks($xref, $tree_id, $gedrec);
672
                self::updateNames($xref, $tree_id, $record);
673
                break;
674
            case 'FAM':
675
                // Convert inline media into media objects
676
                $gedrec = self::convertInlineMedia($tree, $gedrec);
677
678
                if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
679
                    $husb = $match[1];
680
                } else {
681
                    $husb = '';
682
                }
683
                if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
684
                    $wife = $match[1];
685
                } else {
686
                    $wife = '';
687
                }
688
                $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match);
689
                if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) {
690
                    $nchi = max($nchi, $match[1]);
691
                }
692
693
                DB::table('families')->insert([
694
                    'f_id'      => $xref,
695
                    'f_file'    => $tree_id,
696
                    'f_husb'    => $husb,
697
                    'f_wife'    => $wife,
698
                    'f_gedcom'  => $gedrec,
699
                    'f_numchil' => $nchi,
700
                ]);
701
702
                // Update the cross-reference/index tables.
703
                self::updatePlaces($xref, $tree, $gedrec);
704
                self::updateDates($xref, $tree_id, $gedrec);
705
                self::updateLinks($xref, $tree_id, $gedrec);
706
                break;
707
            case 'SOUR':
708
                // Convert inline media into media objects
709
                $gedrec = self::convertInlineMedia($tree, $gedrec);
710
711
                $record = new Source($xref, $gedrec, null, $tree);
712
                if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) {
713
                    $name = $match[1];
714
                } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) {
715
                    $name = $match[1];
716
                } else {
717
                    $name = $xref;
718
                }
719
720
                DB::table('sources')->insert([
721
                    's_id'     => $xref,
722
                    's_file'   => $tree_id,
723
                    's_name'   => mb_substr($name, 0, 255),
724
                    's_gedcom' => $gedrec,
725
                ]);
726
727
                // Update the cross-reference/index tables.
728
                self::updateLinks($xref, $tree_id, $gedrec);
729
                self::updateNames($xref, $tree_id, $record);
730
                break;
731
            case 'REPO':
732
                // Convert inline media into media objects
733
                $gedrec = self::convertInlineMedia($tree, $gedrec);
734
735
                $record = new Repository($xref, $gedrec, null, $tree);
736
737
                DB::table('other')->insert([
738
                    'o_id'     => $xref,
739
                    'o_file'   => $tree_id,
740
                    'o_type'   => 'REPO',
741
                    'o_gedcom' => $gedrec,
742
                ]);
743
744
                // Update the cross-reference/index tables.
745
                self::updateLinks($xref, $tree_id, $gedrec);
746
                self::updateNames($xref, $tree_id, $record);
747
                break;
748
            case 'NOTE':
749
                $record = new Note($xref, $gedrec, null, $tree);
750
751
                DB::table('other')->insert([
752
                    'o_id'     => $xref,
753
                    'o_file'   => $tree_id,
754
                    'o_type'   => 'NOTE',
755
                    'o_gedcom' => $gedrec,
756
                ]);
757
758
                // Update the cross-reference/index tables.
759
                self::updateLinks($xref, $tree_id, $gedrec);
760
                self::updateNames($xref, $tree_id, $record);
761
                break;
762
            case 'OBJE':
763
                $record = new Media($xref, $gedrec, null, $tree);
764
765
                DB::table('media')->insert([
766
                    'm_id'     => $xref,
767
                    'm_file'   => $tree_id,
768
                    'm_gedcom' => $gedrec,
769
                ]);
770
771
                foreach ($record->mediaFiles() as $media_file) {
772
                    DB::table('media_file')->insert([
773
                        'm_id'                 => $xref,
774
                        'm_file'               => $tree_id,
775
                        'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248),
776
                        'multimedia_format'    => mb_substr($media_file->format(), 0, 4),
777
                        'source_media_type'    => mb_substr($media_file->type(), 0, 15),
778
                        'descriptive_title'    => mb_substr($media_file->title(), 0, 248),
779
                    ]);
780
                }
781
782
                // Update the cross-reference/index tables.
783
                self::updateLinks($xref, $tree_id, $gedrec);
784
                self::updateNames($xref, $tree_id, $record);
785
                break;
786
            default: // HEAD, TRLR, SUBM, SUBN, and custom record types.
787
                // Force HEAD records to have a creation date.
788
                if ($type === 'HEAD' && strpos($gedrec, "\n1 DATE ") === false) {
789
                    $gedrec .= "\n1 DATE " . date('j M Y');
790
                }
791
792
                DB::table('other')->insert([
793
                    'o_id'     => $xref,
794
                    'o_file'   => $tree_id,
795
                    'o_type'   => mb_substr($type, 0, 15),
796
                    'o_gedcom' => $gedrec,
797
                ]);
798
799
                // Update the cross-reference/index tables.
800
                self::updateLinks($xref, $tree_id, $gedrec);
801
                break;
802
        }
803
    }
804
805
    /**
806
     * Extract all level 2 places from the given record and insert them into the places table
807
     *
808
     * @param string $xref
809
     * @param Tree   $tree
810
     * @param string $gedrec
811
     *
812
     * @return void
813
     */
814
    public static function updatePlaces(string $xref, Tree $tree, string $gedrec): void
815
    {
816
        preg_match_all('/^[2-9] PLAC (.+)/m', $gedrec, $matches);
817
818
        $places = array_unique($matches[1]);
819
820
        foreach ($places as $place_name) {
821
            $place = new Place($place_name, $tree);
822
823
            // Calling Place::id() will create the entry in the database, if it doesn't already exist.
824
            // Link the place to the record
825
            while ($place->id() !== 0) {
826
                try {
827
                    DB::table('placelinks')->insert([
828
                        'pl_p_id' => $place->id(),
829
                        'pl_gid'  => $xref,
830
                        'pl_file' => $tree->id(),
831
                    ]);
832
                } catch (PDOException $ex) {
833
                    // Already linked this place - so presumably also any parent places.
834
                    break;
835
                }
836
837
                $place = $place->parent();
838
            }
839
        }
840
    }
841
842
    /**
843
     * Extract all the dates from the given record and insert them into the database.
844
     *
845
     * @param string $xref
846
     * @param int    $ged_id
847
     * @param string $gedrec
848
     *
849
     * @return void
850
     */
851
    public static function updateDates($xref, $ged_id, $gedrec): void
852
    {
853
        if (strpos($gedrec, '2 DATE ') && preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*(?:\n2 DATE (.+))(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER)) {
854
            foreach ($matches as $match) {
855
                $fact = $match[1];
856
                if (($fact === 'FACT' || $fact === 'EVEN') && preg_match("/\n2 TYPE ([A-Z]{3,5})/", $match[0], $tmatch)) {
857
                    $fact = $tmatch[1];
858
                }
859
                $date = new Date($match[2]);
860
                DB::table('dates')->insert([
861
                    'd_day'        => $date->minimumDate()->day,
862
                    'd_month'      => $date->minimumDate()->format('%O'),
863
                    'd_mon'        => $date->minimumDate()->month,
864
                    'd_year'       => $date->minimumDate()->year,
865
                    'd_julianday1' => $date->minimumDate()->minimumJulianDay(),
866
                    'd_julianday2' => $date->minimumDate()->maximumJulianDay(),
867
                    'd_fact'       => $fact,
868
                    'd_gid'        => $xref,
869
                    'd_file'       => $ged_id,
870
                    'd_type'       => $date->minimumDate()->format('%@'),
871
                ]);
872
873
                if ($date->minimumDate() !== $date->maximumDate()) {
874
                    DB::table('dates')->insert([
875
                        'd_day'        => $date->maximumDate()->day,
876
                        'd_month'      => $date->maximumDate()->format('%O'),
877
                        'd_mon'        => $date->maximumDate()->month,
878
                        'd_year'       => $date->maximumDate()->year,
879
                        'd_julianday1' => $date->maximumDate()->minimumJulianDay(),
880
                        'd_julianday2' => $date->maximumDate()->maximumJulianDay(),
881
                        'd_fact'       => $fact,
882
                        'd_gid'        => $xref,
883
                        'd_file'       => $ged_id,
884
                        'd_type'       => $date->minimumDate()->format('%@'),
885
                    ]);
886
                }
887
            }
888
        }
889
    }
890
891
    /**
892
     * Extract all the links from the given record and insert them into the database
893
     *
894
     * @param string $xref
895
     * @param int    $ged_id
896
     * @param string $gedrec
897
     *
898
     * @return void
899
     */
900
    public static function updateLinks($xref, $ged_id, $gedrec): void
901
    {
902
        if (preg_match_all('/^\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/m', $gedrec, $matches, PREG_SET_ORDER)) {
903
            $data = [];
904
            foreach ($matches as $match) {
905
                // Include each link once only.
906
                if (!in_array($match[1] . $match[2], $data, true)) {
907
                    $data[] = $match[1] . $match[2];
908
                    try {
909
                        DB::table('link')->insert([
910
                            'l_from' => $xref,
911
                            'l_to'   => $match[2],
912
                            'l_type' => $match[1],
913
                            'l_file' => $ged_id,
914
                        ]);
915
                    } catch (PDOException $ex) {
916
                        // Ignore any errors, which may be caused by "duplicates" that differ on case/collation, e.g. "S1" and "s1"
917
                    }
918
                }
919
            }
920
        }
921
    }
922
923
    /**
924
     * Extract all the names from the given record and insert them into the database.
925
     *
926
     * @param string       $xref
927
     * @param int          $ged_id
928
     * @param GedcomRecord $record
929
     *
930
     * @return void
931
     */
932
    public static function updateNames($xref, $ged_id, GedcomRecord $record): void
933
    {
934
        foreach ($record->getAllNames() as $n => $name) {
935
            if ($record instanceof Individual) {
936
                if ($name['givn'] === '@P.N.') {
937
                    $soundex_givn_std = null;
938
                    $soundex_givn_dm  = null;
939
                } else {
940
                    $soundex_givn_std = Soundex::russell($name['givn']);
941
                    $soundex_givn_dm  = Soundex::daitchMokotoff($name['givn']);
942
                }
943
                if ($name['surn'] === '@N.N.') {
944
                    $soundex_surn_std = null;
945
                    $soundex_surn_dm  = null;
946
                } else {
947
                    $soundex_surn_std = Soundex::russell($name['surname']);
948
                    $soundex_surn_dm  = Soundex::daitchMokotoff($name['surname']);
949
                }
950
                DB::table('name')->insert([
951
                    'n_file'             => $ged_id,
952
                    'n_id'               => $xref,
953
                    'n_num'              => $n,
954
                    'n_type'             => $name['type'],
955
                    'n_sort'             => mb_substr($name['sort'], 0, 255),
956
                    'n_full'             => mb_substr($name['fullNN'], 0, 255),
957
                    'n_surname'          => mb_substr($name['surname'], 0, 255),
958
                    'n_surn'             => mb_substr($name['surn'], 0, 255),
959
                    'n_givn'             => mb_substr($name['givn'], 0, 255),
960
                    'n_soundex_givn_std' => $soundex_givn_std,
961
                    'n_soundex_surn_std' => $soundex_surn_std,
962
                    'n_soundex_givn_dm'  => $soundex_givn_dm,
963
                    'n_soundex_surn_dm'  => $soundex_surn_dm,
964
                ]);
965
            } else {
966
                DB::table('name')->insert([
967
                    'n_file' => $ged_id,
968
                    'n_id'   => $xref,
969
                    'n_num'  => $n,
970
                    'n_type' => $name['type'],
971
                    'n_sort' => mb_substr($name['sort'], 0, 255),
972
                    'n_full' => mb_substr($name['fullNN'], 0, 255),
973
                ]);
974
            }
975
        }
976
    }
977
978
    /**
979
     * Extract inline media data, and convert to media objects.
980
     *
981
     * @param Tree   $tree
982
     * @param string $gedrec
983
     *
984
     * @return string
985
     */
986
    public static function convertInlineMedia(Tree $tree, $gedrec): string
987
    {
988
        while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedrec, $match)) {
989
            $gedrec = str_replace($match[0], self::createMediaObject(1, $match[0], $tree), $gedrec);
990
        }
991
        while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedrec, $match)) {
992
            $gedrec = str_replace($match[0], self::createMediaObject(2, $match[0], $tree), $gedrec);
993
        }
994
        while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedrec, $match)) {
995
            $gedrec = str_replace($match[0], self::createMediaObject(3, $match[0], $tree), $gedrec);
996
        }
997
998
        return $gedrec;
999
    }
1000
1001
    /**
1002
     * Create a new media object, from inline media data.
1003
     *
1004
     * @param int    $level
1005
     * @param string $gedrec
1006
     * @param Tree   $tree
1007
     *
1008
     * @return string
1009
     */
1010
    public static function createMediaObject($level, $gedrec, Tree $tree): string
1011
    {
1012
        if (preg_match('/\n\d FILE (.+)/', $gedrec, $file_match)) {
1013
            $file = $file_match[1];
1014
        } else {
1015
            $file = '';
1016
        }
1017
1018
        if (preg_match('/\n\d TITL (.+)/', $gedrec, $file_match)) {
1019
            $titl = $file_match[1];
1020
        } else {
1021
            $titl = '';
1022
        }
1023
1024
        // Have we already created a media object with the same title/filename?
1025
        $xref = DB::table('media_file')
1026
            ->where('m_file', '=', $tree->id())
1027
            ->where('descriptive_title', '=', $titl)
1028
            ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248))
1029
            ->value('m_id');
1030
1031
        if ($xref === null) {
1032
            $xref = $tree->getNewXref();
1033
            // renumber the lines
1034
            $gedrec = preg_replace_callback('/\n(\d+)/', static function (array $m) use ($level): string {
1035
                return "\n" . ($m[1] - $level);
1036
            }, $gedrec);
1037
            // convert to an object
1038
            $gedrec = str_replace("\n0 OBJE\n", '0 @' . $xref . "@ OBJE\n", $gedrec);
1039
1040
            // Fix Legacy GEDCOMS
1041
            $gedrec = preg_replace('/\n1 FORM (.+)\n1 FILE (.+)\n1 TITL (.+)/', "\n1 FILE $2\n2 FORM $1\n2 TITL $3", $gedrec);
1042
1043
            // Fix FTB GEDCOMS
1044
            $gedrec = preg_replace('/\n1 FORM (.+)\n1 TITL (.+)\n1 FILE (.+)/', "\n1 FILE $3\n2 FORM $1\n2 TITL $2", $gedrec);
1045
1046
            // Fix RM7 GEDCOMS
1047
            $gedrec = preg_replace('/\n1 FILE (.+)\n1 FORM (.+)\n1 TITL (.+)/', "\n1 FILE $1\n2 FORM $2\n2 TITL $3", $gedrec);
1048
1049
            // Create new record
1050
            $record = new Media($xref, $gedrec, null, $tree);
1051
1052
            DB::table('media')->insert([
1053
                'm_id'     => $xref,
1054
                'm_file'   => $tree->id(),
1055
                'm_gedcom' => $gedrec,
1056
            ]);
1057
1058
            foreach ($record->mediaFiles() as $media_file) {
1059
                DB::table('media_file')->insert([
1060
                    'm_id'                 => $xref,
1061
                    'm_file'               => $tree->id(),
1062
                    'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248),
1063
                    'multimedia_format'    => mb_substr($media_file->format(), 0, 4),
1064
                    'source_media_type'    => mb_substr($media_file->type(), 0, 15),
1065
                    'descriptive_title'    => mb_substr($media_file->title(), 0, 248),
1066
                ]);
1067
            }
1068
        }
1069
1070
        return "\n" . $level . ' OBJE @' . $xref . '@';
1071
    }
1072
1073
    /**
1074
     * update a record in the database
1075
     *
1076
     * @param string $gedrec
1077
     * @param Tree   $tree
1078
     * @param bool   $delete
1079
     *
1080
     * @return void
1081
     * @throws GedcomErrorException
1082
     */
1083
    public static function updateRecord($gedrec, Tree $tree, bool $delete): void
1084
    {
1085
        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
1086
            [, $gid, $type] = $match;
1087
        } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) {
1088
            // The HEAD record has no XREF.  Any others?
1089
            $gid  = $match[1];
1090
            $type = $match[1];
1091
        } else {
1092
            throw new GedcomErrorException($gedrec);
1093
        }
1094
1095
        // Place links
1096
        DB::table('placelinks')
1097
            ->where('pl_gid', '=', $gid)
1098
            ->where('pl_file', '=', $tree->id())
1099
            ->delete();
1100
1101
        // Orphaned places.  If we're deleting  "Westminster, London, England",
1102
        // then we may also need to delete "London, England" and "England".
1103
        do {
1104
            $affected = DB::table('places')
1105
                ->leftJoin('placelinks', static function (JoinClause $join): void {
1106
                    $join
1107
                        ->on('p_id', '=', 'pl_p_id')
1108
                        ->on('p_file', '=', 'pl_file');
1109
                })
1110
                ->whereNull('pl_p_id')
1111
                ->delete();
1112
        } while ($affected > 0);
1113
1114
        DB::table('dates')
1115
            ->where('d_gid', '=', $gid)
1116
            ->where('d_file', '=', $tree->id())
1117
            ->delete();
1118
1119
        DB::table('name')
1120
            ->where('n_id', '=', $gid)
1121
            ->where('n_file', '=', $tree->id())
1122
            ->delete();
1123
1124
        DB::table('link')
1125
            ->where('l_from', '=', $gid)
1126
            ->where('l_file', '=', $tree->id())
1127
            ->delete();
1128
1129
        switch ($type) {
1130
            case 'INDI':
1131
                DB::table('individuals')
1132
                    ->where('i_id', '=', $gid)
1133
                    ->where('i_file', '=', $tree->id())
1134
                    ->delete();
1135
                break;
1136
1137
            case 'FAM':
1138
                DB::table('families')
1139
                    ->where('f_id', '=', $gid)
1140
                    ->where('f_file', '=', $tree->id())
1141
                    ->delete();
1142
                break;
1143
1144
            case 'SOUR':
1145
                DB::table('sources')
1146
                    ->where('s_id', '=', $gid)
1147
                    ->where('s_file', '=', $tree->id())
1148
                    ->delete();
1149
                break;
1150
1151
            case 'OBJE':
1152
                DB::table('media_file')
1153
                    ->where('m_id', '=', $gid)
1154
                    ->where('m_file', '=', $tree->id())
1155
                    ->delete();
1156
1157
                DB::table('media')
1158
                    ->where('m_id', '=', $gid)
1159
                    ->where('m_file', '=', $tree->id())
1160
                    ->delete();
1161
                break;
1162
1163
            default:
1164
                DB::table('other')
1165
                    ->where('o_id', '=', $gid)
1166
                    ->where('o_file', '=', $tree->id())
1167
                    ->delete();
1168
                break;
1169
        }
1170
1171
        if (!$delete) {
1172
            self::importRecord($gedrec, $tree, true);
1173
        }
1174
    }
1175
}
1176