| Total Complexity | 124 |
| Total Lines | 809 |
| Duplicated Lines | 0 % |
| Changes | 3 | ||
| Bugs | 0 | Features | 0 |
Complex classes like GedcomImportService often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use GedcomImportService, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 70 | class GedcomImportService |
||
| 71 | { |
||
| 72 | /** |
||
| 73 | * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. |
||
| 74 | */ |
||
| 75 | private function reformatRecord(string $rec, Tree $tree): string |
||
| 218 | } |
||
| 219 | |||
| 220 | /** |
||
| 221 | * import record into database |
||
| 222 | * this function will parse the given gedcom record and add it to the database |
||
| 223 | * |
||
| 224 | * @param string $gedrec the raw gedcom record to parse |
||
| 225 | * @param Tree $tree import the record into this tree |
||
| 226 | * @param bool $update whether this is an updated record that has been accepted |
||
| 227 | * |
||
| 228 | * @throws GedcomErrorException |
||
| 229 | */ |
||
| 230 | public function importRecord(string $gedrec, Tree $tree, bool $update): void |
||
| 231 | { |
||
| 232 | $tree_id = $tree->id(); |
||
| 233 | |||
| 234 | // Escaped @ signs (only if importing from file) |
||
| 235 | if (!$update) { |
||
| 236 | $gedrec = str_replace('@@', '@', $gedrec); |
||
| 237 | } |
||
| 238 | |||
| 239 | // Standardise gedcom format |
||
| 240 | $gedrec = $this->reformatRecord($gedrec, $tree); |
||
| 241 | |||
| 242 | // import different types of records |
||
| 243 | if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { |
||
| 244 | [, $xref, $type] = $match; |
||
| 245 | } elseif (str_starts_with($gedrec, '0 HEAD')) { |
||
| 246 | $type = 'HEAD'; |
||
| 247 | $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. |
||
| 248 | } elseif (str_starts_with($gedrec, '0 TRLR')) { |
||
| 249 | $tree->setPreference('imported', '1'); |
||
| 250 | $type = 'TRLR'; |
||
| 251 | $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. |
||
| 252 | } elseif (preg_match('/^0 (_PTF|_PTE|_STF|_STE|_PLAC|_PEG|LABL) @/', $gedrec) === 1) { |
||
| 253 | // MacFamilyTree creates these records with duplicate XREFs. We can't import these. See #5125 |
||
| 254 | return; |
||
| 255 | } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { |
||
| 256 | $this->importLegacyPlacDefn($gedrec); |
||
| 257 | |||
| 258 | return; |
||
| 259 | } elseif (str_starts_with($gedrec, '0 _PLAC ')) { |
||
| 260 | $this->importTNGPlac($gedrec); |
||
| 261 | |||
| 262 | return; |
||
| 263 | } else { |
||
| 264 | foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) { |
||
| 265 | if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) { |
||
| 266 | return; |
||
| 267 | } |
||
| 268 | } |
||
| 269 | |||
| 270 | throw new GedcomErrorException($gedrec); |
||
| 271 | } |
||
| 272 | |||
| 273 | // Add a _UID |
||
| 274 | if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { |
||
| 275 | $gedrec .= "\n1 _UID " . Registry::idFactory()->pafUid(); |
||
| 276 | } |
||
| 277 | |||
| 278 | // If the user has downloaded their GEDCOM data (containing media objects) and edited it |
||
| 279 | // using an application which does not support (and deletes) media objects, then add them |
||
| 280 | // back in. |
||
| 281 | if ($tree->getPreference('keep_media') === '1') { |
||
| 282 | $old_linked_media = DB::table('link') |
||
| 283 | ->where('l_from', '=', $xref) |
||
| 284 | ->where('l_file', '=', $tree_id) |
||
| 285 | ->where('l_type', '=', 'OBJE') |
||
| 286 | ->pluck('l_to'); |
||
| 287 | |||
| 288 | // Delete these links - so that we do not insert them again in updateLinks() |
||
| 289 | DB::table('link') |
||
| 290 | ->where('l_from', '=', $xref) |
||
| 291 | ->where('l_file', '=', $tree_id) |
||
| 292 | ->where('l_type', '=', 'OBJE') |
||
| 293 | ->delete(); |
||
| 294 | |||
| 295 | foreach ($old_linked_media as $media_id) { |
||
| 296 | $gedrec .= "\n1 OBJE @" . $media_id . '@'; |
||
| 297 | } |
||
| 298 | } |
||
| 299 | |||
| 300 | // Convert inline media into media objects |
||
| 301 | $gedrec = $this->convertInlineMedia($tree, $gedrec); |
||
| 302 | |||
| 303 | switch ($type) { |
||
| 304 | case Individual::RECORD_TYPE: |
||
| 305 | $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); |
||
| 306 | |||
| 307 | if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { |
||
| 308 | $rin = $match[1]; |
||
| 309 | } else { |
||
| 310 | $rin = $xref; |
||
| 311 | } |
||
| 312 | |||
| 313 | // The database can only store MFU, and many of the stats queries assume this. |
||
| 314 | $sex = $record->sex(); |
||
| 315 | $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; |
||
| 316 | |||
| 317 | DB::table('individuals')->insert([ |
||
| 318 | 'i_id' => $xref, |
||
| 319 | 'i_file' => $tree_id, |
||
| 320 | 'i_rin' => $rin, |
||
| 321 | 'i_sex' => $sex, |
||
| 322 | 'i_gedcom' => $gedrec, |
||
| 323 | ]); |
||
| 324 | |||
| 325 | // Update the cross-reference/index tables. |
||
| 326 | $this->updatePlaces($xref, $tree, $gedrec); |
||
| 327 | $this->updateDates($xref, $tree_id, $gedrec); |
||
| 328 | $this->updateNames($xref, $tree_id, $record); |
||
| 329 | break; |
||
| 330 | |||
| 331 | case Family::RECORD_TYPE: |
||
| 332 | if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { |
||
| 333 | $husb = $match[1]; |
||
| 334 | } else { |
||
| 335 | $husb = ''; |
||
| 336 | } |
||
| 337 | if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { |
||
| 338 | $wife = $match[1]; |
||
| 339 | } else { |
||
| 340 | $wife = ''; |
||
| 341 | } |
||
| 342 | $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); |
||
| 343 | if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { |
||
| 344 | $nchi = max($nchi, $match[1]); |
||
| 345 | } |
||
| 346 | |||
| 347 | DB::table('families')->insert([ |
||
| 348 | 'f_id' => $xref, |
||
| 349 | 'f_file' => $tree_id, |
||
| 350 | 'f_husb' => $husb, |
||
| 351 | 'f_wife' => $wife, |
||
| 352 | 'f_gedcom' => $gedrec, |
||
| 353 | 'f_numchil' => $nchi, |
||
| 354 | ]); |
||
| 355 | |||
| 356 | // Update the cross-reference/index tables. |
||
| 357 | $this->updatePlaces($xref, $tree, $gedrec); |
||
| 358 | $this->updateDates($xref, $tree_id, $gedrec); |
||
| 359 | break; |
||
| 360 | |||
| 361 | case Source::RECORD_TYPE: |
||
| 362 | if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { |
||
| 363 | $name = $match[1]; |
||
| 364 | } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { |
||
| 365 | $name = $match[1]; |
||
| 366 | } else { |
||
| 367 | $name = $xref; |
||
| 368 | } |
||
| 369 | |||
| 370 | DB::table('sources')->insert([ |
||
| 371 | 's_id' => $xref, |
||
| 372 | 's_file' => $tree_id, |
||
| 373 | 's_name' => mb_substr($name, 0, 255), |
||
| 374 | 's_gedcom' => $gedrec, |
||
| 375 | ]); |
||
| 376 | break; |
||
| 377 | |||
| 378 | case Repository::RECORD_TYPE: |
||
| 379 | case Note::RECORD_TYPE: |
||
| 380 | case Submission::RECORD_TYPE: |
||
| 381 | case Submitter::RECORD_TYPE: |
||
| 382 | case Location::RECORD_TYPE: |
||
| 383 | DB::table('other')->insert([ |
||
| 384 | 'o_id' => $xref, |
||
| 385 | 'o_file' => $tree_id, |
||
| 386 | 'o_type' => $type, |
||
| 387 | 'o_gedcom' => $gedrec, |
||
| 388 | ]); |
||
| 389 | break; |
||
| 390 | |||
| 391 | case Header::RECORD_TYPE: |
||
| 392 | // Force HEAD records to have a creation date. |
||
| 393 | if (!str_contains($gedrec, "\n1 DATE ")) { |
||
| 394 | $today = strtoupper(date('d M Y')); |
||
| 395 | $gedrec .= "\n1 DATE " . $today; |
||
| 396 | } |
||
| 397 | |||
| 398 | DB::table('other')->insert([ |
||
| 399 | 'o_id' => $xref, |
||
| 400 | 'o_file' => $tree_id, |
||
| 401 | 'o_type' => Header::RECORD_TYPE, |
||
| 402 | 'o_gedcom' => $gedrec, |
||
| 403 | ]); |
||
| 404 | break; |
||
| 405 | |||
| 406 | case Media::RECORD_TYPE: |
||
| 407 | $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); |
||
| 408 | |||
| 409 | DB::table('media')->insert([ |
||
| 410 | 'm_id' => $xref, |
||
| 411 | 'm_file' => $tree_id, |
||
| 412 | 'm_gedcom' => $gedrec, |
||
| 413 | ]); |
||
| 414 | |||
| 415 | foreach ($record->mediaFiles() as $media_file) { |
||
| 416 | DB::table('media_file')->insert([ |
||
| 417 | 'm_id' => $xref, |
||
| 418 | 'm_file' => $tree_id, |
||
| 419 | 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), |
||
| 420 | 'multimedia_format' => mb_substr($media_file->format(), 0, 4), |
||
| 421 | 'source_media_type' => mb_substr($media_file->type(), 0, 15), |
||
| 422 | 'descriptive_title' => mb_substr($media_file->title(), 0, 248), |
||
| 423 | ]); |
||
| 424 | } |
||
| 425 | break; |
||
| 426 | |||
| 427 | default: // Custom record types. |
||
| 428 | DB::table('other')->insert([ |
||
| 429 | 'o_id' => $xref, |
||
| 430 | 'o_file' => $tree_id, |
||
| 431 | 'o_type' => mb_substr($type, 0, 15), |
||
| 432 | 'o_gedcom' => $gedrec, |
||
| 433 | ]); |
||
| 434 | break; |
||
| 435 | } |
||
| 436 | |||
| 437 | // Update the cross-reference/index tables. |
||
| 438 | $this->updateLinks($xref, $tree_id, $gedrec); |
||
| 439 | } |
||
| 440 | |||
| 441 | /** |
||
| 442 | * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values |
||
| 443 | */ |
||
| 444 | private function importLegacyPlacDefn(string $gedcom): void |
||
| 445 | { |
||
| 446 | $gedcom_service = new GedcomService(); |
||
| 447 | |||
| 448 | if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { |
||
| 449 | $place_name = $match[1]; |
||
| 450 | } else { |
||
| 451 | return; |
||
| 452 | } |
||
| 453 | |||
| 454 | if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { |
||
| 455 | $latitude = $gedcom_service->readLatitude($match[1]); |
||
| 456 | } else { |
||
| 457 | return; |
||
| 458 | } |
||
| 459 | |||
| 460 | if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { |
||
| 461 | $longitude = $gedcom_service->readLongitude($match[1]); |
||
| 462 | } else { |
||
| 463 | return; |
||
| 464 | } |
||
| 465 | |||
| 466 | $location = new PlaceLocation($place_name); |
||
| 467 | |||
| 468 | if ($location->latitude() === null && $location->longitude() === null) { |
||
| 469 | DB::table('place_location') |
||
| 470 | ->where('id', '=', $location->id()) |
||
| 471 | ->update([ |
||
| 472 | 'latitude' => $latitude, |
||
| 473 | 'longitude' => $longitude, |
||
| 474 | ]); |
||
| 475 | } |
||
| 476 | } |
||
| 477 | |||
| 478 | /** |
||
| 479 | * TNG generates _PLAC records containing LAT/LONG values |
||
| 480 | */ |
||
| 481 | private function importTNGPlac(string $gedcom): void |
||
| 482 | { |
||
| 483 | if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { |
||
| 484 | $place_name = $match[1]; |
||
| 485 | } else { |
||
| 486 | return; |
||
| 487 | } |
||
| 488 | |||
| 489 | if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { |
||
| 490 | $latitude = (float) $match[1]; |
||
| 491 | } else { |
||
| 492 | return; |
||
| 493 | } |
||
| 494 | |||
| 495 | if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { |
||
| 496 | $longitude = (float) $match[1]; |
||
| 497 | } else { |
||
| 498 | return; |
||
| 499 | } |
||
| 500 | |||
| 501 | $location = new PlaceLocation($place_name); |
||
| 502 | |||
| 503 | if ($location->latitude() === null && $location->longitude() === null) { |
||
| 504 | DB::table('place_location') |
||
| 505 | ->where('id', '=', $location->id()) |
||
| 506 | ->update([ |
||
| 507 | 'latitude' => $latitude, |
||
| 508 | 'longitude' => $longitude, |
||
| 509 | ]); |
||
| 510 | } |
||
| 511 | } |
||
| 512 | |||
| 513 | /** |
||
| 514 | * Extract all level 2 places from the given record and insert them into the places table |
||
| 515 | */ |
||
| 516 | public function updatePlaces(string $xref, Tree $tree, string $gedrec): void |
||
| 517 | { |
||
| 518 | // Insert all new rows together |
||
| 519 | $rows = []; |
||
| 520 | |||
| 521 | preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); |
||
| 522 | |||
| 523 | $places = array_unique($matches[1]); |
||
| 524 | |||
| 525 | foreach ($places as $place_name) { |
||
| 526 | $place = new Place($place_name, $tree); |
||
| 527 | |||
| 528 | // Calling Place::id() will create the entry in the database, if it doesn't already exist. |
||
| 529 | while ($place->id() !== 0) { |
||
| 530 | $rows[] = [ |
||
| 531 | 'pl_p_id' => $place->id(), |
||
| 532 | 'pl_gid' => $xref, |
||
| 533 | 'pl_file' => $tree->id(), |
||
| 534 | ]; |
||
| 535 | |||
| 536 | $place = $place->parent(); |
||
| 537 | } |
||
| 538 | } |
||
| 539 | |||
| 540 | // array_unique doesn't work with arrays of arrays |
||
| 541 | $rows = array_intersect_key($rows, array_unique(array_map(serialize(...), $rows))); |
||
| 542 | |||
| 543 | // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. |
||
| 544 | foreach (array_chunk($rows, 20000) as $chunk) { |
||
| 545 | DB::table('placelinks')->insert($chunk); |
||
| 546 | } |
||
| 547 | } |
||
| 548 | |||
| 549 | /** |
||
| 550 | * Extract all the dates from the given record and insert them into the database. |
||
| 551 | */ |
||
| 552 | private function updateDates(string $xref, int $ged_id, string $gedrec): void |
||
| 593 | } |
||
| 594 | |||
| 595 | /** |
||
| 596 | * Extract all the links from the given record and insert them into the database |
||
| 597 | */ |
||
| 598 | private function updateLinks(string $xref, int $ged_id, string $gedrec): void |
||
| 619 | } |
||
| 620 | |||
| 621 | /** |
||
| 622 | * Extract all the names from the given record and insert them into the database. |
||
| 623 | */ |
||
| 624 | private function updateNames(string $xref, int $ged_id, Individual $record): void |
||
| 664 | } |
||
| 665 | |||
| 666 | /** |
||
| 667 | * Extract inline media data, and convert to media objects. |
||
| 668 | */ |
||
| 669 | private function convertInlineMedia(Tree $tree, string $gedcom): string |
||
| 685 | } |
||
| 686 | |||
| 687 | /** |
||
| 688 | * Create a new media object, from inline media data. |
||
| 689 | * |
||
| 690 | * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL |
||
| 691 | * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL |
||
| 692 | * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI |
||
| 693 | * |
||
| 694 | * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE |
||
| 695 | * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL |
||
| 696 | */ |
||
| 697 | private function createMediaObject(string $gedcom, Tree $tree): string |
||
| 787 | } |
||
| 788 | |||
| 789 | public function updateRecord(string $gedrec, Tree $tree, bool $delete): void |
||
| 879 | } |
||
| 880 | } |
||
| 881 | } |
||
| 882 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths