1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* webtrees: online genealogy |
5
|
|
|
* Copyright (C) 2025 webtrees development team |
6
|
|
|
* This program is free software: you can redistribute it and/or modify |
7
|
|
|
* it under the terms of the GNU General Public License as published by |
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
* (at your option) any later version. |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* You should have received a copy of the GNU General Public License |
15
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>. |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
declare(strict_types=1); |
19
|
|
|
|
20
|
|
|
namespace Fisharebest\Webtrees\Services; |
21
|
|
|
|
22
|
|
|
use Fisharebest\Webtrees\Date; |
23
|
|
|
use Fisharebest\Webtrees\DB; |
|
|
|
|
24
|
|
|
use Fisharebest\Webtrees\Exceptions\GedcomErrorException; |
25
|
|
|
use Fisharebest\Webtrees\Family; |
|
|
|
|
26
|
|
|
use Fisharebest\Webtrees\Gedcom; |
27
|
|
|
use Fisharebest\Webtrees\Header; |
|
|
|
|
28
|
|
|
use Fisharebest\Webtrees\Individual; |
|
|
|
|
29
|
|
|
use Fisharebest\Webtrees\Location; |
|
|
|
|
30
|
|
|
use Fisharebest\Webtrees\Media; |
|
|
|
|
31
|
|
|
use Fisharebest\Webtrees\Note; |
|
|
|
|
32
|
|
|
use Fisharebest\Webtrees\Place; |
33
|
|
|
use Fisharebest\Webtrees\PlaceLocation; |
34
|
|
|
use Fisharebest\Webtrees\Registry; |
35
|
|
|
use Fisharebest\Webtrees\Repository; |
|
|
|
|
36
|
|
|
use Fisharebest\Webtrees\Soundex; |
|
|
|
|
37
|
|
|
use Fisharebest\Webtrees\Source; |
|
|
|
|
38
|
|
|
use Fisharebest\Webtrees\Submission; |
|
|
|
|
39
|
|
|
use Fisharebest\Webtrees\Submitter; |
|
|
|
|
40
|
|
|
use Fisharebest\Webtrees\Tree; |
41
|
|
|
use Illuminate\Database\Query\JoinClause; |
42
|
|
|
|
43
|
|
|
use function array_chunk; |
44
|
|
|
use function array_intersect_key; |
45
|
|
|
use function array_map; |
46
|
|
|
use function array_unique; |
47
|
|
|
use function array_values; |
48
|
|
|
use function date; |
49
|
|
|
use function explode; |
50
|
|
|
use function max; |
51
|
|
|
use function mb_substr; |
52
|
|
|
use function preg_match; |
53
|
|
|
use function preg_match_all; |
54
|
|
|
use function preg_replace; |
55
|
|
|
use function round; |
56
|
|
|
use function str_contains; |
57
|
|
|
use function str_replace; |
58
|
|
|
use function str_starts_with; |
59
|
|
|
use function strlen; |
60
|
|
|
use function strtoupper; |
61
|
|
|
use function strtr; |
62
|
|
|
use function substr; |
63
|
|
|
use function trim; |
64
|
|
|
|
65
|
|
|
use const PREG_SET_ORDER; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* Class GedcomImportService - import GEDCOM data |
69
|
|
|
*/ |
70
|
|
|
class GedcomImportService |
71
|
|
|
{ |
72
|
|
|
/** |
73
|
|
|
* Tidy up a gedcom record on import, so that we can access it consistently/efficiently. |
74
|
|
|
*/ |
75
|
|
|
private function reformatRecord(string $rec, Tree $tree): string |
76
|
|
|
{ |
77
|
|
|
$gedcom_service = Registry::container()->get(GedcomService::class); |
|
|
|
|
78
|
|
|
|
79
|
|
|
// Strip out mac/msdos line endings |
80
|
|
|
$rec = preg_replace("/[\r\n]+/", "\n", $rec); |
81
|
|
|
|
82
|
|
|
// Extract lines from the record; lines consist of: level + optional xref + tag + optional data |
83
|
|
|
$num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); |
84
|
|
|
|
85
|
|
|
// Process the record line-by-line |
86
|
|
|
$newrec = ''; |
87
|
|
|
foreach ($matches as $n => $match) { |
88
|
|
|
[, $level, $xref, $tag, $data] = $match; |
89
|
|
|
|
90
|
|
|
$tag = $gedcom_service->canonicalTag($tag); |
91
|
|
|
|
92
|
|
|
switch ($tag) { |
93
|
|
|
case 'DATE': |
94
|
|
|
// Preserve text from INT dates |
95
|
|
|
if (str_contains($data, '(')) { |
96
|
|
|
[$date, $text] = explode('(', $data, 2); |
97
|
|
|
$text = ' (' . $text; |
98
|
|
|
} else { |
99
|
|
|
$date = $data; |
100
|
|
|
$text = ''; |
101
|
|
|
} |
102
|
|
|
// Capitals |
103
|
|
|
$date = strtoupper($date); |
104
|
|
|
// Temporarily add leading/trailing spaces, to allow efficient matching below |
105
|
|
|
$date = ' ' . $date . ' '; |
106
|
|
|
// Ensure space digits and letters |
107
|
|
|
$date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); |
108
|
|
|
$date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); |
109
|
|
|
// Ensure space before/after calendar escapes |
110
|
|
|
$date = preg_replace('/@#[^@]+@/', ' $0 ', $date); |
111
|
|
|
// "BET." => "BET" |
112
|
|
|
$date = preg_replace('/(\w\w)\./', '$1', $date); |
113
|
|
|
// "CIR" => "ABT" |
114
|
|
|
$date = str_replace(' CIR ', ' ABT ', $date); |
115
|
|
|
$date = str_replace(' APX ', ' ABT ', $date); |
116
|
|
|
// B.C. => BC (temporarily, to allow easier handling of ".") |
117
|
|
|
$date = str_replace(' B.C. ', ' BC ', $date); |
118
|
|
|
// TMG uses "EITHER X OR Y" |
119
|
|
|
$date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); |
120
|
|
|
// "BET X - Y " => "BET X AND Y" |
121
|
|
|
$date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); |
122
|
|
|
$date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); |
123
|
|
|
// "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" |
124
|
|
|
$date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); |
125
|
|
|
$date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); |
126
|
|
|
// "@#ESC@ AFT X" => "AFT @#ESC@ X" |
127
|
|
|
$date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); |
128
|
|
|
// Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" |
129
|
|
|
// (don't change "/" - it is used in NS/OS dates) |
130
|
|
|
$date = preg_replace('/[.,:;-]/', ' ', $date); |
131
|
|
|
// BC => B.C. |
132
|
|
|
$date = str_replace(' BC ', ' B.C. ', $date); |
133
|
|
|
// Append the "INT" text |
134
|
|
|
$data = $date . $text; |
135
|
|
|
break; |
136
|
|
|
case 'HEAD': |
137
|
|
|
case 'TRLR': |
138
|
|
|
// HEAD and TRLR records do not have an XREF or DATA |
139
|
|
|
if ($level === '0') { |
140
|
|
|
$xref = ''; |
141
|
|
|
$data = ''; |
142
|
|
|
} |
143
|
|
|
break; |
144
|
|
|
case 'NAME': |
145
|
|
|
// Tidy up non-printing characters |
146
|
|
|
$data = preg_replace('/ +/', ' ', trim($data)); |
147
|
|
|
break; |
148
|
|
|
case 'PLAC': |
149
|
|
|
// Consistent commas |
150
|
|
|
$data = preg_replace('/ *[,,،] */u', ', ', $data); |
151
|
|
|
// The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W |
152
|
|
|
if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match) === 1) { |
153
|
|
|
$degns = (int) $match[2]; |
154
|
|
|
$minns = (int) $match[3]; |
155
|
|
|
$secns = (int) $match[4]; |
156
|
|
|
$degew = (int) $match[6]; |
157
|
|
|
$minew = (int) $match[7]; |
158
|
|
|
$secew = (int) $match[8]; |
159
|
|
|
$data = |
160
|
|
|
$match[1] . "\n" . |
161
|
|
|
(1 + (int) $level) . " MAP\n" . |
162
|
|
|
(2 + (int) $level) . ' LATI ' . ($match[5] . round($degns + $minns / 60 + $secns / 3600, 4)) . "\n" . |
163
|
|
|
(2 + (int) $level) . ' LONG ' . ($match[9] . round($degew + $minew / 60 + $secew / 3600, 4)); |
164
|
|
|
} |
165
|
|
|
break; |
166
|
|
|
case 'SEX': |
167
|
|
|
$data = strtoupper($data); |
168
|
|
|
break; |
169
|
|
|
} |
170
|
|
|
// Suppress "Y", for facts/events with a DATE or PLAC |
171
|
|
|
if ($data === 'y') { |
172
|
|
|
$data = 'Y'; |
173
|
|
|
} |
174
|
|
|
if ($level === '1' && $data === 'Y') { |
175
|
|
|
for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { |
176
|
|
|
if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { |
177
|
|
|
$data = ''; |
178
|
|
|
break; |
179
|
|
|
} |
180
|
|
|
} |
181
|
|
|
} |
182
|
|
|
// Reassemble components back into a single line |
183
|
|
|
switch ($tag) { |
184
|
|
|
default: |
185
|
|
|
// Remove tabs and multiple/leading/trailing spaces |
186
|
|
|
$data = strtr($data, ["\t" => ' ']); |
187
|
|
|
$data = trim($data, ' '); |
188
|
|
|
while (str_contains($data, ' ')) { |
189
|
|
|
$data = strtr($data, [' ' => ' ']); |
190
|
|
|
} |
191
|
|
|
$newrec .= ($newrec !== '' ? "\n" : '') . $level . ' ' . ($level === '0' && $xref !== '' ? $xref . ' ' : '') . $tag . ($data === '' ? '' : ' ' . $data); |
192
|
|
|
break; |
193
|
|
|
case 'NOTE': |
194
|
|
|
case 'TEXT': |
195
|
|
|
case 'DATA': |
196
|
|
|
case 'CONT': |
197
|
|
|
$newrec .= ($newrec !== '' ? "\n" : '') . $level . ' ' . ($level === '0' && $xref !== '' ? $xref . ' ' : '') . $tag . ($data === '' ? '' : ' ' . $data); |
198
|
|
|
break; |
199
|
|
|
case 'FILE': |
200
|
|
|
// Strip off the user-defined path prefix |
201
|
|
|
$GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); |
202
|
|
|
if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { |
203
|
|
|
$data = substr($data, strlen($GEDCOM_MEDIA_PATH)); |
204
|
|
|
} |
205
|
|
|
// convert backslashes in filenames to forward slashes |
206
|
|
|
$data = preg_replace("/\\\\/", '/', $data); |
207
|
|
|
|
208
|
|
|
$newrec .= ($newrec !== '' ? "\n" : '') . $level . ' ' . ($level === '0' && $xref !== '' ? $xref . ' ' : '') . $tag . ($data === '' ? '' : ' ' . $data); |
209
|
|
|
break; |
210
|
|
|
case 'CONC': |
211
|
|
|
// Merge CONC lines, to simplify access later on. |
212
|
|
|
$newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; |
213
|
|
|
break; |
214
|
|
|
} |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
return $newrec; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* import record into database |
222
|
|
|
* this function will parse the given gedcom record and add it to the database |
223
|
|
|
* |
224
|
|
|
* @param string $gedrec the raw gedcom record to parse |
225
|
|
|
* @param Tree $tree import the record into this tree |
226
|
|
|
* @param bool $update whether this is an updated record that has been accepted |
227
|
|
|
* |
228
|
|
|
* @throws GedcomErrorException |
229
|
|
|
*/ |
230
|
|
|
public function importRecord(string $gedrec, Tree $tree, bool $update): void |
231
|
|
|
{ |
232
|
|
|
$tree_id = $tree->id(); |
233
|
|
|
|
234
|
|
|
// Escaped @ signs (only if importing from file) |
235
|
|
|
if (!$update) { |
236
|
|
|
$gedrec = str_replace('@@', '@', $gedrec); |
237
|
|
|
} |
238
|
|
|
|
239
|
|
|
// Standardise gedcom format |
240
|
|
|
$gedrec = $this->reformatRecord($gedrec, $tree); |
241
|
|
|
|
242
|
|
|
// import different types of records |
243
|
|
|
if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { |
244
|
|
|
[, $xref, $type] = $match; |
245
|
|
|
} elseif (str_starts_with($gedrec, '0 HEAD')) { |
246
|
|
|
$type = 'HEAD'; |
247
|
|
|
$xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. |
248
|
|
|
} elseif (str_starts_with($gedrec, '0 TRLR')) { |
249
|
|
|
$tree->setPreference('imported', '1'); |
250
|
|
|
$type = 'TRLR'; |
251
|
|
|
$xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. |
252
|
|
|
} elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { |
253
|
|
|
$this->importLegacyPlacDefn($gedrec); |
254
|
|
|
|
255
|
|
|
return; |
256
|
|
|
} elseif (str_starts_with($gedrec, '0 _PLAC ')) { |
257
|
|
|
$this->importTNGPlac($gedrec); |
258
|
|
|
|
259
|
|
|
return; |
260
|
|
|
} else { |
261
|
|
|
foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) { |
262
|
|
|
if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) { |
263
|
|
|
return; |
264
|
|
|
} |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
throw new GedcomErrorException($gedrec); |
268
|
|
|
} |
269
|
|
|
|
270
|
|
|
// Add a _UID |
271
|
|
|
if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { |
272
|
|
|
$gedrec .= "\n1 _UID " . Registry::idFactory()->pafUid(); |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
// If the user has downloaded their GEDCOM data (containing media objects) and edited it |
276
|
|
|
// using an application which does not support (and deletes) media objects, then add them |
277
|
|
|
// back in. |
278
|
|
|
if ($tree->getPreference('keep_media') === '1') { |
279
|
|
|
$old_linked_media = DB::table('link') |
280
|
|
|
->where('l_from', '=', $xref) |
281
|
|
|
->where('l_file', '=', $tree_id) |
282
|
|
|
->where('l_type', '=', 'OBJE') |
283
|
|
|
->pluck('l_to'); |
284
|
|
|
|
285
|
|
|
// Delete these links - so that we do not insert them again in updateLinks() |
286
|
|
|
DB::table('link') |
287
|
|
|
->where('l_from', '=', $xref) |
288
|
|
|
->where('l_file', '=', $tree_id) |
289
|
|
|
->where('l_type', '=', 'OBJE') |
290
|
|
|
->delete(); |
291
|
|
|
|
292
|
|
|
foreach ($old_linked_media as $media_id) { |
293
|
|
|
$gedrec .= "\n1 OBJE @" . $media_id . '@'; |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
|
297
|
|
|
// Convert inline media into media objects |
298
|
|
|
$gedrec = $this->convertInlineMedia($tree, $gedrec); |
299
|
|
|
|
300
|
|
|
switch ($type) { |
301
|
|
|
case Individual::RECORD_TYPE: |
302
|
|
|
$record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); |
303
|
|
|
|
304
|
|
|
if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { |
305
|
|
|
$rin = $match[1]; |
306
|
|
|
} else { |
307
|
|
|
$rin = $xref; |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
// The database can only store MFU, and many of the stats queries assume this. |
311
|
|
|
$sex = $record->sex(); |
312
|
|
|
$sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; |
313
|
|
|
|
314
|
|
|
DB::table('individuals')->insert([ |
315
|
|
|
'i_id' => $xref, |
316
|
|
|
'i_file' => $tree_id, |
317
|
|
|
'i_rin' => $rin, |
318
|
|
|
'i_sex' => $sex, |
319
|
|
|
'i_gedcom' => $gedrec, |
320
|
|
|
]); |
321
|
|
|
|
322
|
|
|
// Update the cross-reference/index tables. |
323
|
|
|
$this->updatePlaces($xref, $tree, $gedrec); |
324
|
|
|
$this->updateDates($xref, $tree_id, $gedrec); |
325
|
|
|
$this->updateNames($xref, $tree_id, $record); |
326
|
|
|
break; |
327
|
|
|
|
328
|
|
|
case Family::RECORD_TYPE: |
329
|
|
|
if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { |
330
|
|
|
$husb = $match[1]; |
331
|
|
|
} else { |
332
|
|
|
$husb = ''; |
333
|
|
|
} |
334
|
|
|
if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { |
335
|
|
|
$wife = $match[1]; |
336
|
|
|
} else { |
337
|
|
|
$wife = ''; |
338
|
|
|
} |
339
|
|
|
$nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); |
340
|
|
|
if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { |
341
|
|
|
$nchi = max($nchi, $match[1]); |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
DB::table('families')->insert([ |
345
|
|
|
'f_id' => $xref, |
346
|
|
|
'f_file' => $tree_id, |
347
|
|
|
'f_husb' => $husb, |
348
|
|
|
'f_wife' => $wife, |
349
|
|
|
'f_gedcom' => $gedrec, |
350
|
|
|
'f_numchil' => $nchi, |
351
|
|
|
]); |
352
|
|
|
|
353
|
|
|
// Update the cross-reference/index tables. |
354
|
|
|
$this->updatePlaces($xref, $tree, $gedrec); |
355
|
|
|
$this->updateDates($xref, $tree_id, $gedrec); |
356
|
|
|
break; |
357
|
|
|
|
358
|
|
|
case Source::RECORD_TYPE: |
359
|
|
|
if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { |
360
|
|
|
$name = $match[1]; |
361
|
|
|
} elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { |
362
|
|
|
$name = $match[1]; |
363
|
|
|
} else { |
364
|
|
|
$name = $xref; |
365
|
|
|
} |
366
|
|
|
|
367
|
|
|
DB::table('sources')->insert([ |
368
|
|
|
's_id' => $xref, |
369
|
|
|
's_file' => $tree_id, |
370
|
|
|
's_name' => mb_substr($name, 0, 255), |
371
|
|
|
's_gedcom' => $gedrec, |
372
|
|
|
]); |
373
|
|
|
break; |
374
|
|
|
|
375
|
|
|
case Repository::RECORD_TYPE: |
376
|
|
|
case Note::RECORD_TYPE: |
377
|
|
|
case Submission::RECORD_TYPE: |
378
|
|
|
case Submitter::RECORD_TYPE: |
379
|
|
|
case Location::RECORD_TYPE: |
380
|
|
|
DB::table('other')->insert([ |
381
|
|
|
'o_id' => $xref, |
382
|
|
|
'o_file' => $tree_id, |
383
|
|
|
'o_type' => $type, |
384
|
|
|
'o_gedcom' => $gedrec, |
385
|
|
|
]); |
386
|
|
|
break; |
387
|
|
|
|
388
|
|
|
case Header::RECORD_TYPE: |
389
|
|
|
// Force HEAD records to have a creation date. |
390
|
|
|
if (!str_contains($gedrec, "\n1 DATE ")) { |
391
|
|
|
$today = strtoupper(date('d M Y')); |
392
|
|
|
$gedrec .= "\n1 DATE " . $today; |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
DB::table('other')->insert([ |
396
|
|
|
'o_id' => $xref, |
397
|
|
|
'o_file' => $tree_id, |
398
|
|
|
'o_type' => Header::RECORD_TYPE, |
399
|
|
|
'o_gedcom' => $gedrec, |
400
|
|
|
]); |
401
|
|
|
break; |
402
|
|
|
|
403
|
|
|
case Media::RECORD_TYPE: |
404
|
|
|
$record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); |
405
|
|
|
|
406
|
|
|
DB::table('media')->insert([ |
407
|
|
|
'm_id' => $xref, |
408
|
|
|
'm_file' => $tree_id, |
409
|
|
|
'm_gedcom' => $gedrec, |
410
|
|
|
]); |
411
|
|
|
|
412
|
|
|
foreach ($record->mediaFiles() as $media_file) { |
413
|
|
|
DB::table('media_file')->insert([ |
414
|
|
|
'm_id' => $xref, |
415
|
|
|
'm_file' => $tree_id, |
416
|
|
|
'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), |
417
|
|
|
'multimedia_format' => mb_substr($media_file->format(), 0, 4), |
418
|
|
|
'source_media_type' => mb_substr($media_file->type(), 0, 15), |
419
|
|
|
'descriptive_title' => mb_substr($media_file->title(), 0, 248), |
420
|
|
|
]); |
421
|
|
|
} |
422
|
|
|
break; |
423
|
|
|
|
424
|
|
|
default: // Custom record types. |
425
|
|
|
DB::table('other')->insert([ |
426
|
|
|
'o_id' => $xref, |
427
|
|
|
'o_file' => $tree_id, |
428
|
|
|
'o_type' => mb_substr($type, 0, 15), |
429
|
|
|
'o_gedcom' => $gedrec, |
430
|
|
|
]); |
431
|
|
|
break; |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
// Update the cross-reference/index tables. |
435
|
|
|
$this->updateLinks($xref, $tree_id, $gedrec); |
436
|
|
|
} |
437
|
|
|
|
438
|
|
|
/** |
439
|
|
|
* Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values |
440
|
|
|
*/ |
441
|
|
|
private function importLegacyPlacDefn(string $gedcom): void |
442
|
|
|
{ |
443
|
|
|
$gedcom_service = new GedcomService(); |
444
|
|
|
|
445
|
|
|
if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { |
446
|
|
|
$place_name = $match[1]; |
447
|
|
|
} else { |
448
|
|
|
return; |
449
|
|
|
} |
450
|
|
|
|
451
|
|
|
if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { |
452
|
|
|
$latitude = $gedcom_service->readLatitude($match[1]); |
453
|
|
|
} else { |
454
|
|
|
return; |
455
|
|
|
} |
456
|
|
|
|
457
|
|
|
if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { |
458
|
|
|
$longitude = $gedcom_service->readLongitude($match[1]); |
459
|
|
|
} else { |
460
|
|
|
return; |
461
|
|
|
} |
462
|
|
|
|
463
|
|
|
$location = new PlaceLocation($place_name); |
464
|
|
|
|
465
|
|
|
if ($location->latitude() === null && $location->longitude() === null) { |
466
|
|
|
DB::table('place_location') |
467
|
|
|
->where('id', '=', $location->id()) |
468
|
|
|
->update([ |
469
|
|
|
'latitude' => $latitude, |
470
|
|
|
'longitude' => $longitude, |
471
|
|
|
]); |
472
|
|
|
} |
473
|
|
|
} |
474
|
|
|
|
475
|
|
|
/** |
476
|
|
|
* TNG generates _PLAC records containing LAT/LONG values |
477
|
|
|
*/ |
478
|
|
|
private function importTNGPlac(string $gedcom): void |
479
|
|
|
{ |
480
|
|
|
if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { |
481
|
|
|
$place_name = $match[1]; |
482
|
|
|
} else { |
483
|
|
|
return; |
484
|
|
|
} |
485
|
|
|
|
486
|
|
|
if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { |
487
|
|
|
$latitude = (float) $match[1]; |
488
|
|
|
} else { |
489
|
|
|
return; |
490
|
|
|
} |
491
|
|
|
|
492
|
|
|
if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { |
493
|
|
|
$longitude = (float) $match[1]; |
494
|
|
|
} else { |
495
|
|
|
return; |
496
|
|
|
} |
497
|
|
|
|
498
|
|
|
$location = new PlaceLocation($place_name); |
499
|
|
|
|
500
|
|
|
if ($location->latitude() === null && $location->longitude() === null) { |
501
|
|
|
DB::table('place_location') |
502
|
|
|
->where('id', '=', $location->id()) |
503
|
|
|
->update([ |
504
|
|
|
'latitude' => $latitude, |
505
|
|
|
'longitude' => $longitude, |
506
|
|
|
]); |
507
|
|
|
} |
508
|
|
|
} |
509
|
|
|
|
510
|
|
|
/** |
511
|
|
|
* Extract all level 2 places from the given record and insert them into the places table |
512
|
|
|
*/ |
513
|
|
|
public function updatePlaces(string $xref, Tree $tree, string $gedrec): void |
514
|
|
|
{ |
515
|
|
|
// Insert all new rows together |
516
|
|
|
$rows = []; |
517
|
|
|
|
518
|
|
|
preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); |
519
|
|
|
|
520
|
|
|
$places = array_unique($matches[1]); |
521
|
|
|
|
522
|
|
|
foreach ($places as $place_name) { |
523
|
|
|
$place = new Place($place_name, $tree); |
524
|
|
|
|
525
|
|
|
// Calling Place::id() will create the entry in the database, if it doesn't already exist. |
526
|
|
|
while ($place->id() !== 0) { |
527
|
|
|
$rows[] = [ |
528
|
|
|
'pl_p_id' => $place->id(), |
529
|
|
|
'pl_gid' => $xref, |
530
|
|
|
'pl_file' => $tree->id(), |
531
|
|
|
]; |
532
|
|
|
|
533
|
|
|
$place = $place->parent(); |
534
|
|
|
} |
535
|
|
|
} |
536
|
|
|
|
537
|
|
|
// array_unique doesn't work with arrays of arrays |
538
|
|
|
$rows = array_intersect_key($rows, array_unique(array_map(serialize(...), $rows))); |
|
|
|
|
539
|
|
|
|
540
|
|
|
// PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. |
541
|
|
|
foreach (array_chunk($rows, 20000) as $chunk) { |
542
|
|
|
DB::table('placelinks')->insert($chunk); |
543
|
|
|
} |
544
|
|
|
} |
545
|
|
|
|
546
|
|
|
/** |
547
|
|
|
* Extract all the dates from the given record and insert them into the database. |
548
|
|
|
*/ |
549
|
|
|
private function updateDates(string $xref, int $ged_id, string $gedrec): void |
550
|
|
|
{ |
551
|
|
|
// Insert all new rows together |
552
|
|
|
$rows = []; |
553
|
|
|
|
554
|
|
|
preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); |
555
|
|
|
|
556
|
|
|
foreach ($matches as $match) { |
557
|
|
|
$fact = $match[1]; |
558
|
|
|
$date = new Date($match[2]); |
559
|
|
|
$rows[] = [ |
560
|
|
|
'd_day' => $date->minimumDate()->day, |
561
|
|
|
'd_month' => $date->minimumDate()->format('%O'), |
562
|
|
|
'd_mon' => $date->minimumDate()->month, |
563
|
|
|
'd_year' => $date->minimumDate()->year, |
564
|
|
|
'd_julianday1' => $date->minimumDate()->minimumJulianDay(), |
565
|
|
|
'd_julianday2' => $date->minimumDate()->maximumJulianDay(), |
566
|
|
|
'd_fact' => $fact, |
567
|
|
|
'd_gid' => $xref, |
568
|
|
|
'd_file' => $ged_id, |
569
|
|
|
'd_type' => $date->minimumDate()->format('%@'), |
570
|
|
|
]; |
571
|
|
|
|
572
|
|
|
$rows[] = [ |
573
|
|
|
'd_day' => $date->maximumDate()->day, |
574
|
|
|
'd_month' => $date->maximumDate()->format('%O'), |
575
|
|
|
'd_mon' => $date->maximumDate()->month, |
576
|
|
|
'd_year' => $date->maximumDate()->year, |
577
|
|
|
'd_julianday1' => $date->maximumDate()->minimumJulianDay(), |
578
|
|
|
'd_julianday2' => $date->maximumDate()->maximumJulianDay(), |
579
|
|
|
'd_fact' => $fact, |
580
|
|
|
'd_gid' => $xref, |
581
|
|
|
'd_file' => $ged_id, |
582
|
|
|
'd_type' => $date->minimumDate()->format('%@'), |
583
|
|
|
]; |
584
|
|
|
} |
585
|
|
|
|
586
|
|
|
// array_unique doesn't work with arrays of arrays |
587
|
|
|
$rows = array_intersect_key($rows, array_unique(array_map(serialize(...), $rows))); |
588
|
|
|
|
589
|
|
|
DB::table('dates')->insert(array_values($rows)); |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
/** |
593
|
|
|
* Extract all the links from the given record and insert them into the database |
594
|
|
|
*/ |
595
|
|
|
private function updateLinks(string $xref, int $ged_id, string $gedrec): void |
596
|
|
|
{ |
597
|
|
|
// Insert all new rows together |
598
|
|
|
$rows = []; |
599
|
|
|
|
600
|
|
|
preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); |
601
|
|
|
|
602
|
|
|
foreach ($matches as $match) { |
603
|
|
|
// Some applications (e.g. GenoPro) create links longer than 15 characters. |
604
|
|
|
$link = mb_substr($match[1], 0, 15); |
605
|
|
|
|
606
|
|
|
// Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" |
607
|
|
|
$rows[$link . strtoupper($match[2])] = [ |
608
|
|
|
'l_from' => $xref, |
609
|
|
|
'l_to' => $match[2], |
610
|
|
|
'l_type' => $link, |
611
|
|
|
'l_file' => $ged_id, |
612
|
|
|
]; |
613
|
|
|
} |
614
|
|
|
|
615
|
|
|
DB::table('link')->insert(array_values($rows)); |
616
|
|
|
} |
617
|
|
|
|
618
|
|
|
/** |
619
|
|
|
* Extract all the names from the given record and insert them into the database. |
620
|
|
|
*/ |
621
|
|
|
private function updateNames(string $xref, int $ged_id, Individual $record): void |
622
|
|
|
{ |
623
|
|
|
// Insert all new rows together |
624
|
|
|
$rows = []; |
625
|
|
|
|
626
|
|
|
foreach ($record->getAllNames() as $n => $name) { |
627
|
|
|
if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { |
628
|
|
|
$soundex_givn_std = null; |
629
|
|
|
$soundex_givn_dm = null; |
630
|
|
|
} else { |
631
|
|
|
$soundex_givn_std = Soundex::russell($name['givn']); |
632
|
|
|
$soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); |
633
|
|
|
} |
634
|
|
|
|
635
|
|
|
if ($name['surn'] === Individual::NOMEN_NESCIO) { |
636
|
|
|
$soundex_surn_std = null; |
637
|
|
|
$soundex_surn_dm = null; |
638
|
|
|
} else { |
639
|
|
|
$soundex_surn_std = Soundex::russell($name['surname']); |
640
|
|
|
$soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); |
641
|
|
|
} |
642
|
|
|
|
643
|
|
|
$rows[] = [ |
644
|
|
|
'n_file' => $ged_id, |
645
|
|
|
'n_id' => $xref, |
646
|
|
|
'n_num' => $n, |
647
|
|
|
'n_type' => $name['type'], |
648
|
|
|
'n_sort' => mb_substr($name['sort'], 0, 255), |
649
|
|
|
'n_full' => mb_substr($name['fullNN'], 0, 255), |
650
|
|
|
'n_surname' => mb_substr($name['surname'], 0, 255), |
651
|
|
|
'n_surn' => mb_substr($name['surn'], 0, 255), |
652
|
|
|
'n_givn' => mb_substr($name['givn'], 0, 255), |
653
|
|
|
'n_soundex_givn_std' => $soundex_givn_std, |
654
|
|
|
'n_soundex_surn_std' => $soundex_surn_std, |
655
|
|
|
'n_soundex_givn_dm' => $soundex_givn_dm, |
656
|
|
|
'n_soundex_surn_dm' => $soundex_surn_dm, |
657
|
|
|
]; |
658
|
|
|
} |
659
|
|
|
|
660
|
|
|
DB::table('name')->insert($rows); |
661
|
|
|
} |
662
|
|
|
|
663
|
|
|
/** |
664
|
|
|
* Extract inline media data, and convert to media objects. |
665
|
|
|
*/ |
666
|
|
|
private function convertInlineMedia(Tree $tree, string $gedcom): string |
667
|
|
|
{ |
668
|
|
|
while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { |
669
|
|
|
$xref = $this->createMediaObject($match[0], $tree); |
670
|
|
|
$gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); |
671
|
|
|
} |
672
|
|
|
while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { |
673
|
|
|
$xref = $this->createMediaObject($match[0], $tree); |
674
|
|
|
$gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); |
675
|
|
|
} |
676
|
|
|
while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { |
677
|
|
|
$xref = $this->createMediaObject($match[0], $tree); |
678
|
|
|
$gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); |
679
|
|
|
} |
680
|
|
|
|
681
|
|
|
return $gedcom; |
682
|
|
|
} |
683
|
|
|
|
684
|
|
|
/** |
685
|
|
|
* Create a new media object, from inline media data. |
686
|
|
|
* |
687
|
|
|
* GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL |
688
|
|
|
* GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL |
689
|
|
|
* GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI |
690
|
|
|
* |
691
|
|
|
* Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE |
692
|
|
|
* RootsMagic generates: +1 FILE / +1 FORM / +1 TITL |
693
|
|
|
*/ |
694
|
|
|
private function createMediaObject(string $gedcom, Tree $tree): string |
695
|
|
|
{ |
696
|
|
|
preg_match('/\n\d FILE (.+)/', $gedcom, $match); |
697
|
|
|
$file = $match[1] ?? ''; |
698
|
|
|
|
699
|
|
|
preg_match('/\n\d TITL (.+)/', $gedcom, $match); |
700
|
|
|
$title = $match[1] ?? ''; |
701
|
|
|
|
702
|
|
|
preg_match('/\n\d FORM (.+)/', $gedcom, $match); |
703
|
|
|
$format = $match[1] ?? ''; |
704
|
|
|
|
705
|
|
|
preg_match('/\n\d MEDI (.+)/', $gedcom, $match); |
706
|
|
|
$media = $match[1] ?? ''; |
707
|
|
|
|
708
|
|
|
preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); |
709
|
|
|
$scrapbook = $match[1] ?? ''; |
710
|
|
|
|
711
|
|
|
preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); |
712
|
|
|
$primary = $match[1] ?? ''; |
713
|
|
|
|
714
|
|
|
preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); |
715
|
|
|
if ($media === '') { |
716
|
|
|
// Legacy uses _TYPE instead of MEDI |
717
|
|
|
$media = $match[1] ?? ''; |
718
|
|
|
$type = ''; |
719
|
|
|
} else { |
720
|
|
|
$type = $match[1] ?? ''; |
721
|
|
|
} |
722
|
|
|
|
723
|
|
|
preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); |
724
|
|
|
$notes = $matches[1]; |
725
|
|
|
|
726
|
|
|
// Have we already created a media object with the same title/filename? |
727
|
|
|
$xref = DB::table('media_file') |
728
|
|
|
->where('m_file', '=', $tree->id()) |
729
|
|
|
->where('descriptive_title', '=', mb_substr($title, 0, 248)) |
730
|
|
|
->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) |
731
|
|
|
->value('m_id'); |
732
|
|
|
|
733
|
|
|
if ($xref === null) { |
734
|
|
|
$xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); |
735
|
|
|
|
736
|
|
|
// convert to a media-object |
737
|
|
|
$gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; |
738
|
|
|
|
739
|
|
|
if ($format !== '') { |
740
|
|
|
$gedcom .= "\n2 FORM " . $format; |
741
|
|
|
|
742
|
|
|
if ($media !== '') { |
743
|
|
|
$gedcom .= "\n3 TYPE " . $media; |
744
|
|
|
} |
745
|
|
|
} |
746
|
|
|
|
747
|
|
|
if ($title !== '') { |
748
|
|
|
$gedcom .= "\n2 TITL " . $title; |
749
|
|
|
} |
750
|
|
|
|
751
|
|
|
if ($scrapbook !== '') { |
752
|
|
|
$gedcom .= "\n1 _SCBK " . $scrapbook; |
753
|
|
|
} |
754
|
|
|
|
755
|
|
|
if ($primary !== '') { |
756
|
|
|
$gedcom .= "\n1 _PRIM " . $primary; |
757
|
|
|
} |
758
|
|
|
|
759
|
|
|
if ($type !== '') { |
760
|
|
|
$gedcom .= "\n1 _TYPE " . $type; |
761
|
|
|
} |
762
|
|
|
|
763
|
|
|
foreach ($notes as $note) { |
764
|
|
|
$gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); |
765
|
|
|
} |
766
|
|
|
|
767
|
|
|
DB::table('media')->insert([ |
768
|
|
|
'm_id' => $xref, |
769
|
|
|
'm_file' => $tree->id(), |
770
|
|
|
'm_gedcom' => $gedcom, |
771
|
|
|
]); |
772
|
|
|
|
773
|
|
|
DB::table('media_file')->insert([ |
774
|
|
|
'm_id' => $xref, |
775
|
|
|
'm_file' => $tree->id(), |
776
|
|
|
'multimedia_file_refn' => mb_substr($file, 0, 248), |
777
|
|
|
'multimedia_format' => mb_substr($format, 0, 4), |
778
|
|
|
'source_media_type' => mb_substr($media, 0, 15), |
779
|
|
|
'descriptive_title' => mb_substr($title, 0, 248), |
780
|
|
|
]); |
781
|
|
|
} |
782
|
|
|
|
783
|
|
|
return $xref; |
784
|
|
|
} |
785
|
|
|
|
786
|
|
|
public function updateRecord(string $gedrec, Tree $tree, bool $delete): void |
787
|
|
|
{ |
788
|
|
|
if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match) === 1) { |
789
|
|
|
[, $gid, $type] = $match; |
790
|
|
|
} elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { |
791
|
|
|
// The HEAD record has no XREF. Any others? |
792
|
|
|
$gid = $match[1]; |
793
|
|
|
$type = $match[1]; |
794
|
|
|
} else { |
795
|
|
|
throw new GedcomErrorException($gedrec); |
796
|
|
|
} |
797
|
|
|
|
798
|
|
|
// Place links |
799
|
|
|
DB::table('placelinks') |
800
|
|
|
->where('pl_gid', '=', $gid) |
801
|
|
|
->where('pl_file', '=', $tree->id()) |
802
|
|
|
->delete(); |
803
|
|
|
|
804
|
|
|
// Orphaned places. If we're deleting "Westminster, London, England", |
805
|
|
|
// then we may also need to delete "London, England" and "England". |
806
|
|
|
do { |
807
|
|
|
$affected = DB::table('places') |
808
|
|
|
->leftJoin('placelinks', function (JoinClause $join): void { |
809
|
|
|
$join |
810
|
|
|
->on('p_id', '=', 'pl_p_id') |
811
|
|
|
->on('p_file', '=', 'pl_file'); |
812
|
|
|
}) |
813
|
|
|
->whereNull('pl_p_id') |
814
|
|
|
->delete(); |
815
|
|
|
} while ($affected > 0); |
816
|
|
|
|
817
|
|
|
DB::table('dates') |
818
|
|
|
->where('d_gid', '=', $gid) |
819
|
|
|
->where('d_file', '=', $tree->id()) |
820
|
|
|
->delete(); |
821
|
|
|
|
822
|
|
|
DB::table('name') |
823
|
|
|
->where('n_id', '=', $gid) |
824
|
|
|
->where('n_file', '=', $tree->id()) |
825
|
|
|
->delete(); |
826
|
|
|
|
827
|
|
|
DB::table('link') |
828
|
|
|
->where('l_from', '=', $gid) |
829
|
|
|
->where('l_file', '=', $tree->id()) |
830
|
|
|
->delete(); |
831
|
|
|
|
832
|
|
|
switch ($type) { |
833
|
|
|
case Individual::RECORD_TYPE: |
834
|
|
|
DB::table('individuals') |
835
|
|
|
->where('i_id', '=', $gid) |
836
|
|
|
->where('i_file', '=', $tree->id()) |
837
|
|
|
->delete(); |
838
|
|
|
break; |
839
|
|
|
|
840
|
|
|
case Family::RECORD_TYPE: |
841
|
|
|
DB::table('families') |
842
|
|
|
->where('f_id', '=', $gid) |
843
|
|
|
->where('f_file', '=', $tree->id()) |
844
|
|
|
->delete(); |
845
|
|
|
break; |
846
|
|
|
|
847
|
|
|
case Source::RECORD_TYPE: |
848
|
|
|
DB::table('sources') |
849
|
|
|
->where('s_id', '=', $gid) |
850
|
|
|
->where('s_file', '=', $tree->id()) |
851
|
|
|
->delete(); |
852
|
|
|
break; |
853
|
|
|
|
854
|
|
|
case Media::RECORD_TYPE: |
855
|
|
|
DB::table('media_file') |
856
|
|
|
->where('m_id', '=', $gid) |
857
|
|
|
->where('m_file', '=', $tree->id()) |
858
|
|
|
->delete(); |
859
|
|
|
|
860
|
|
|
DB::table('media') |
861
|
|
|
->where('m_id', '=', $gid) |
862
|
|
|
->where('m_file', '=', $tree->id()) |
863
|
|
|
->delete(); |
864
|
|
|
break; |
865
|
|
|
|
866
|
|
|
default: |
867
|
|
|
DB::table('other') |
868
|
|
|
->where('o_id', '=', $gid) |
869
|
|
|
->where('o_file', '=', $tree->id()) |
870
|
|
|
->delete(); |
871
|
|
|
break; |
872
|
|
|
} |
873
|
|
|
|
874
|
|
|
if (!$delete) { |
875
|
|
|
$this->importRecord($gedrec, $tree, true); |
876
|
|
|
} |
877
|
|
|
} |
878
|
|
|
} |
879
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths