fisharebest /
webtrees
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * webtrees: online genealogy |
||
| 5 | * Copyright (C) 2025 webtrees development team |
||
| 6 | * This program is free software: you can redistribute it and/or modify |
||
| 7 | * it under the terms of the GNU General Public License as published by |
||
| 8 | * the Free Software Foundation, either version 3 of the License, or |
||
| 9 | * (at your option) any later version. |
||
| 10 | * This program is distributed in the hope that it will be useful, |
||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 13 | * GNU General Public License for more details. |
||
| 14 | * You should have received a copy of the GNU General Public License |
||
| 15 | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
||
| 16 | */ |
||
| 17 | |||
| 18 | declare(strict_types=1); |
||
| 19 | |||
| 20 | namespace Fisharebest\Webtrees\Services; |
||
| 21 | |||
| 22 | use Fisharebest\Webtrees\Auth; |
||
| 23 | use Fisharebest\Webtrees\DB; |
||
| 24 | use Fisharebest\Webtrees\Encodings\UTF16BE; |
||
| 25 | use Fisharebest\Webtrees\Encodings\UTF16LE; |
||
| 26 | use Fisharebest\Webtrees\Encodings\UTF8; |
||
| 27 | use Fisharebest\Webtrees\Encodings\Windows1252; |
||
| 28 | use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; |
||
| 29 | use Fisharebest\Webtrees\Gedcom; |
||
| 30 | use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; |
||
| 31 | use Fisharebest\Webtrees\GedcomRecord; |
||
| 32 | use Fisharebest\Webtrees\Header; |
||
| 33 | use Fisharebest\Webtrees\Registry; |
||
| 34 | use Fisharebest\Webtrees\Site; |
||
| 35 | use Fisharebest\Webtrees\Tree; |
||
| 36 | use Fisharebest\Webtrees\Webtrees; |
||
| 37 | use Illuminate\Database\Query\Builder; |
||
| 38 | use Illuminate\Database\Query\Expression; |
||
| 39 | use Illuminate\Support\Collection; |
||
| 40 | use League\Flysystem\Filesystem; |
||
| 41 | use League\Flysystem\FilesystemOperator; |
||
| 42 | use Psr\Http\Message\ResponseFactoryInterface; |
||
| 43 | use Psr\Http\Message\ResponseInterface; |
||
| 44 | use Psr\Http\Message\StreamFactoryInterface; |
||
| 45 | use RuntimeException; |
||
| 46 | use ZipArchive; |
||
| 47 | |||
| 48 | use function addcslashes; |
||
| 49 | use function date; |
||
| 50 | use function explode; |
||
| 51 | use function fclose; |
||
| 52 | use function fopen; |
||
| 53 | use function fwrite; |
||
| 54 | use function is_string; |
||
| 55 | use function pathinfo; |
||
| 56 | use function preg_match_all; |
||
| 57 | use function rewind; |
||
| 58 | use function stream_filter_append; |
||
| 59 | use function stream_get_meta_data; |
||
| 60 | use function strlen; |
||
| 61 | use function strpos; |
||
| 62 | use function strtolower; |
||
| 63 | use function strtoupper; |
||
| 64 | use function tmpfile; |
||
| 65 | |||
| 66 | use const PATHINFO_EXTENSION; |
||
| 67 | use const PREG_SET_ORDER; |
||
| 68 | use const STREAM_FILTER_WRITE; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * Export data in GEDCOM format |
||
| 72 | */ |
||
| 73 | class GedcomExportService |
||
| 74 | { |
||
| 75 | private const array ACCESS_LEVELS = [ |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 76 | 'gedadmin' => Auth::PRIV_NONE, |
||
| 77 | 'user' => Auth::PRIV_USER, |
||
| 78 | 'visitor' => Auth::PRIV_PRIVATE, |
||
| 79 | 'none' => Auth::PRIV_HIDE, |
||
| 80 | ]; |
||
| 81 | |||
| 82 | public function __construct( |
||
| 83 | private readonly ResponseFactoryInterface $response_factory, |
||
| 84 | private readonly StreamFactoryInterface $stream_factory, |
||
| 85 | ) { |
||
| 86 | } |
||
| 87 | |||
| 88 | /** |
||
| 89 | * @param Tree $tree Export data from this tree |
||
| 90 | * @param bool $sort_by_xref Write GEDCOM records in XREF order |
||
| 91 | * @param string $encoding Convert from UTF-8 to other encoding |
||
| 92 | * @param string $privacy Filter records by role |
||
| 93 | * @param string $line_endings CRLF or LF |
||
| 94 | * @param string $filename Name of download file, without an extension |
||
| 95 | * @param string $format One of: gedcom, zip, zipmedia, gedzip |
||
| 96 | * @param Collection<int,string|object|GedcomRecord>|null $records |
||
| 97 | */ |
||
| 98 | public function downloadResponse( |
||
| 99 | Tree $tree, |
||
| 100 | bool $sort_by_xref, |
||
| 101 | string $encoding, |
||
| 102 | string $privacy, |
||
| 103 | string $line_endings, |
||
| 104 | string $filename, |
||
| 105 | string $format, |
||
| 106 | Collection|null $records = null |
||
| 107 | ): ResponseInterface { |
||
| 108 | $access_level = self::ACCESS_LEVELS[$privacy]; |
||
| 109 | |||
| 110 | if ($format === 'gedcom') { |
||
| 111 | $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records); |
||
| 112 | $stream = $this->stream_factory->createStreamFromResource($resource); |
||
| 113 | |||
| 114 | return $this->response_factory->createResponse() |
||
| 115 | ->withBody($stream) |
||
| 116 | ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME) |
||
| 117 | ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"'); |
||
| 118 | } |
||
| 119 | |||
| 120 | // Create a new/empty .ZIP file |
||
| 121 | $temp_zip_file = stream_get_meta_data(tmpfile())['uri']; |
||
| 122 | $zip_filesystem = new ZipArchive(); |
||
| 123 | $zip_filesystem->open($temp_zip_file, ZipArchive::CREATE | ZipArchive::OVERWRITE); |
||
| 124 | |||
| 125 | if ($format === 'zipmedia') { |
||
| 126 | $media_path = $tree->getPreference('MEDIA_DIRECTORY'); |
||
| 127 | } elseif ($format === 'gedzip') { |
||
| 128 | $media_path = ''; |
||
| 129 | } else { |
||
| 130 | // Don't add media |
||
| 131 | $media_path = null; |
||
| 132 | } |
||
| 133 | |||
| 134 | $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path); |
||
| 135 | |||
| 136 | if ($format === 'gedzip') { |
||
| 137 | $zip_filesystem->addFromString('gedcom.ged', stream_get_contents($resource)); |
||
| 138 | $extension = '.gdz'; |
||
| 139 | } else { |
||
| 140 | $zip_filesystem->addFromString($filename . '.ged', stream_get_contents($resource)); |
||
| 141 | $extension = '.zip'; |
||
| 142 | } |
||
| 143 | |||
| 144 | fclose($resource); |
||
| 145 | |||
| 146 | $zip_filesystem->close(); |
||
| 147 | |||
| 148 | $stream = $this->stream_factory->createStreamFromFile($temp_zip_file); |
||
| 149 | |||
| 150 | return $this->response_factory->createResponse() |
||
| 151 | ->withBody($stream) |
||
| 152 | ->withHeader('content-type', 'application/zip') |
||
| 153 | ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"'); |
||
| 154 | } |
||
| 155 | |||
| 156 | /** |
||
| 157 | * Write GEDCOM data to a stream. |
||
| 158 | * |
||
| 159 | * @param Tree $tree Export data from this tree |
||
| 160 | * @param bool $sort_by_xref Write GEDCOM records in XREF order |
||
| 161 | * @param string $encoding Convert from UTF-8 to other encoding |
||
| 162 | * @param int $access_level Apply privacy filtering |
||
| 163 | * @param string $line_endings CRLF or LF |
||
| 164 | * @param Collection<int,string|object|GedcomRecord>|null $records Just export these records |
||
| 165 | * @param ZipArchive|FilesystemOperator|null $zip_filesystem Write media files to this filesystem |
||
| 166 | * @param string|null $media_path Location within the zip filesystem |
||
| 167 | * |
||
| 168 | * @return resource |
||
| 169 | */ |
||
| 170 | public function export( |
||
| 171 | Tree $tree, |
||
| 172 | bool $sort_by_xref = false, |
||
| 173 | string $encoding = UTF8::NAME, |
||
| 174 | int $access_level = Auth::PRIV_HIDE, |
||
| 175 | string $line_endings = 'CRLF', |
||
| 176 | Collection|null $records = null, |
||
| 177 | ZipArchive|FilesystemOperator|null $zip_filesystem = null, |
||
| 178 | string|null $media_path = null |
||
| 179 | ) { |
||
| 180 | $stream = fopen('php://memory', 'wb+'); |
||
| 181 | |||
| 182 | if ($stream === false) { |
||
| 183 | throw new RuntimeException('Failed to create temporary stream'); |
||
| 184 | } |
||
| 185 | |||
| 186 | stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); |
||
| 187 | |||
| 188 | if ($records instanceof Collection) { |
||
| 189 | // Export just these records - e.g. from clippings cart. |
||
| 190 | $data = [ |
||
| 191 | new Collection([$this->createHeader($tree, $encoding, false)]), |
||
| 192 | $records, |
||
| 193 | new Collection(['0 TRLR']), |
||
| 194 | ]; |
||
| 195 | } elseif ($access_level === Auth::PRIV_HIDE) { |
||
| 196 | // If we will be applying privacy filters, then we will need the GEDCOM record objects. |
||
| 197 | $data = [ |
||
| 198 | new Collection([$this->createHeader($tree, $encoding, true)]), |
||
| 199 | $this->individualQuery($tree, $sort_by_xref)->cursor(), |
||
| 200 | $this->familyQuery($tree, $sort_by_xref)->cursor(), |
||
| 201 | $this->sourceQuery($tree, $sort_by_xref)->cursor(), |
||
| 202 | $this->otherQuery($tree, $sort_by_xref)->cursor(), |
||
| 203 | $this->mediaQuery($tree, $sort_by_xref)->cursor(), |
||
| 204 | new Collection(['0 TRLR']), |
||
| 205 | ]; |
||
| 206 | } else { |
||
| 207 | // Disable the pending changes before creating GEDCOM records. |
||
| 208 | Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static fn (): Collection => new Collection()); |
||
| 209 | |||
| 210 | $data = [ |
||
| 211 | new Collection([$this->createHeader($tree, $encoding, true)]), |
||
| 212 | $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), |
||
| 213 | $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), |
||
| 214 | $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), |
||
| 215 | $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), |
||
| 216 | $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), |
||
| 217 | new Collection(['0 TRLR']), |
||
| 218 | ]; |
||
| 219 | } |
||
| 220 | |||
| 221 | $media_filesystem = $tree->mediaFilesystem(); |
||
| 222 | |||
| 223 | foreach ($data as $rows) { |
||
| 224 | foreach ($rows as $datum) { |
||
| 225 | if (is_string($datum)) { |
||
| 226 | $gedcom = $datum; |
||
| 227 | } elseif ($datum instanceof GedcomRecord) { |
||
| 228 | $gedcom = $datum->privatizeGedcom($access_level); |
||
| 229 | |||
| 230 | if ($gedcom === '') { |
||
| 231 | continue; |
||
| 232 | } |
||
| 233 | } else { |
||
| 234 | $gedcom = |
||
| 235 | $datum->i_gedcom ?? |
||
| 236 | $datum->f_gedcom ?? |
||
| 237 | $datum->s_gedcom ?? |
||
| 238 | $datum->m_gedcom ?? |
||
| 239 | $datum->o_gedcom; |
||
| 240 | } |
||
| 241 | |||
| 242 | if ($media_path !== null && preg_match('/^0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) { |
||
| 243 | preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER); |
||
| 244 | |||
| 245 | foreach ($matches as $match) { |
||
| 246 | $media_file = $match[1]; |
||
| 247 | |||
| 248 | if ($media_filesystem->fileExists($media_file)) { |
||
| 249 | if ($zip_filesystem instanceof Filesystem) { |
||
| 250 | $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file)); |
||
| 251 | } |
||
| 252 | |||
| 253 | if ($zip_filesystem instanceof ZipArchive) { |
||
| 254 | // If the media file is stored locally, we can add it directly to the ZipArchive |
||
| 255 | // $local_file = Site::getPreference('INDEX_DIRECTORY') . $tree->getPreference('MEDIA_DIRECTORY') . $media_path . $media_file; |
||
| 256 | // $zip_filesystem->addFile($local_file, $media_path . $media_file); |
||
| 257 | |||
| 258 | $zip_filesystem->addFromString($media_path . $media_file, $media_filesystem->read($media_file)); |
||
| 259 | } |
||
| 260 | } |
||
| 261 | } |
||
| 262 | } |
||
| 263 | |||
| 264 | $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; |
||
| 265 | |||
| 266 | if ($line_endings === 'CRLF') { |
||
| 267 | $gedcom = strtr($gedcom, ["\n" => "\r\n"]); |
||
| 268 | } |
||
| 269 | |||
| 270 | $bytes_written = fwrite($stream, $gedcom); |
||
| 271 | |||
| 272 | if ($bytes_written !== strlen($gedcom)) { |
||
| 273 | throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); |
||
| 274 | } |
||
| 275 | } |
||
| 276 | } |
||
| 277 | |||
| 278 | if (rewind($stream) === false) { |
||
| 279 | throw new RuntimeException('Cannot rewind temporary stream'); |
||
| 280 | } |
||
| 281 | |||
| 282 | return $stream; |
||
| 283 | } |
||
| 284 | |||
| 285 | public function createHeader(Tree $tree, string $encoding, bool $include_sub): string |
||
| 286 | { |
||
| 287 | // Force a ".ged" suffix |
||
| 288 | $filename = $tree->name(); |
||
| 289 | |||
| 290 | if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { |
||
| 291 | $filename .= '.ged'; |
||
| 292 | } |
||
| 293 | |||
| 294 | $gedcom_encodings = [ |
||
| 295 | UTF16BE::NAME => 'UNICODE', |
||
| 296 | UTF16LE::NAME => 'UNICODE', |
||
| 297 | Windows1252::NAME => 'ANSI', |
||
| 298 | ]; |
||
| 299 | |||
| 300 | $encoding = $gedcom_encodings[$encoding] ?? $encoding; |
||
| 301 | |||
| 302 | // Build a new header record |
||
| 303 | $gedcom = '0 HEAD'; |
||
| 304 | $gedcom .= "\n1 SOUR " . Webtrees::NAME; |
||
| 305 | $gedcom .= "\n2 NAME " . Webtrees::NAME; |
||
| 306 | $gedcom .= "\n2 VERS " . Webtrees::VERSION; |
||
| 307 | $gedcom .= "\n1 DEST DISKETTE"; |
||
| 308 | $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); |
||
| 309 | $gedcom .= "\n2 TIME " . date('H:i:s'); |
||
| 310 | $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; |
||
| 311 | $gedcom .= "\n1 CHAR " . $encoding; |
||
| 312 | $gedcom .= "\n1 FILE " . $filename; |
||
| 313 | |||
| 314 | // Preserve some values from the original header |
||
| 315 | $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); |
||
| 316 | |||
| 317 | // There should always be a header record. |
||
| 318 | if ($header instanceof Header) { |
||
| 319 | foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { |
||
| 320 | $gedcom .= "\n" . $fact->gedcom(); |
||
| 321 | } |
||
| 322 | |||
| 323 | if ($include_sub) { |
||
| 324 | foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { |
||
| 325 | $gedcom .= "\n" . $fact->gedcom(); |
||
| 326 | } |
||
| 327 | } |
||
| 328 | } |
||
| 329 | |||
| 330 | return $gedcom; |
||
| 331 | } |
||
| 332 | |||
| 333 | public function wrapLongLines(string $gedcom, int $max_line_length): string |
||
| 334 | { |
||
| 335 | $lines = []; |
||
| 336 | |||
| 337 | foreach (explode("\n", $gedcom) as $line) { |
||
| 338 | // Split long lines |
||
| 339 | // The total length of a GEDCOM line, including level number, cross-reference number, |
||
| 340 | // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. |
||
| 341 | if (mb_strlen($line) > $max_line_length) { |
||
| 342 | [$level, $tag] = explode(' ', $line, 3); |
||
| 343 | if ($tag !== 'CONT') { |
||
| 344 | $level++; |
||
| 345 | } |
||
| 346 | do { |
||
| 347 | // Split after $pos chars |
||
| 348 | $pos = $max_line_length; |
||
| 349 | // Split on a non-space (standard gedcom behavior) |
||
| 350 | while (mb_substr($line, $pos - 1, 1) === ' ') { |
||
| 351 | --$pos; |
||
| 352 | } |
||
| 353 | if ($pos === strpos($line, ' ', 3)) { |
||
| 354 | // No non-spaces in the data! Can’t split it :-( |
||
| 355 | break; |
||
| 356 | } |
||
| 357 | $lines[] = mb_substr($line, 0, $pos); |
||
| 358 | $line = $level . ' CONC ' . mb_substr($line, $pos); |
||
| 359 | } while (mb_strlen($line) > $max_line_length); |
||
| 360 | } |
||
| 361 | $lines[] = $line; |
||
| 362 | } |
||
| 363 | |||
| 364 | return implode("\n", $lines); |
||
| 365 | } |
||
| 366 | |||
| 367 | private function familyQuery(Tree $tree, bool $sort_by_xref): Builder |
||
| 368 | { |
||
| 369 | $query = DB::table('families') |
||
| 370 | ->where('f_file', '=', $tree->id()) |
||
| 371 | ->select(['f_gedcom', 'f_id']); |
||
| 372 | |||
| 373 | if ($sort_by_xref) { |
||
| 374 | $query |
||
| 375 | ->orderBy(new Expression('LENGTH(f_id)')) |
||
| 376 | ->orderBy('f_id'); |
||
| 377 | } |
||
| 378 | |||
| 379 | return $query; |
||
| 380 | } |
||
| 381 | |||
| 382 | private function individualQuery(Tree $tree, bool $sort_by_xref): Builder |
||
| 383 | { |
||
| 384 | $query = DB::table('individuals') |
||
| 385 | ->where('i_file', '=', $tree->id()) |
||
| 386 | ->select(['i_gedcom', 'i_id']); |
||
| 387 | |||
| 388 | if ($sort_by_xref) { |
||
| 389 | $query |
||
| 390 | ->orderBy(new Expression('LENGTH(i_id)')) |
||
| 391 | ->orderBy('i_id'); |
||
| 392 | } |
||
| 393 | |||
| 394 | return $query; |
||
| 395 | } |
||
| 396 | |||
| 397 | private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder |
||
| 398 | { |
||
| 399 | $query = DB::table('sources') |
||
| 400 | ->where('s_file', '=', $tree->id()) |
||
| 401 | ->select(['s_gedcom', 's_id']); |
||
| 402 | |||
| 403 | if ($sort_by_xref) { |
||
| 404 | $query |
||
| 405 | ->orderBy(new Expression('LENGTH(s_id)')) |
||
| 406 | ->orderBy('s_id'); |
||
| 407 | } |
||
| 408 | |||
| 409 | return $query; |
||
| 410 | } |
||
| 411 | |||
| 412 | private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder |
||
| 413 | { |
||
| 414 | $query = DB::table('media') |
||
| 415 | ->where('m_file', '=', $tree->id()) |
||
| 416 | ->select(['m_gedcom', 'm_id']); |
||
| 417 | |||
| 418 | if ($sort_by_xref) { |
||
| 419 | $query |
||
| 420 | ->orderBy(new Expression('LENGTH(m_id)')) |
||
| 421 | ->orderBy('m_id'); |
||
| 422 | } |
||
| 423 | |||
| 424 | return $query; |
||
| 425 | } |
||
| 426 | |||
| 427 | private function otherQuery(Tree $tree, bool $sort_by_xref): Builder |
||
| 428 | { |
||
| 429 | $query = DB::table('other') |
||
| 430 | ->where('o_file', '=', $tree->id()) |
||
| 431 | ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) |
||
| 432 | ->select(['o_gedcom', 'o_id']); |
||
| 433 | |||
| 434 | if ($sort_by_xref) { |
||
| 435 | $query |
||
| 436 | ->orderBy('o_type') |
||
| 437 | ->orderBy(new Expression('LENGTH(o_id)')) |
||
| 438 | ->orderBy('o_id'); |
||
| 439 | } |
||
| 440 | |||
| 441 | return $query; |
||
| 442 | } |
||
| 443 | } |
||
| 444 |