Issues (2491)

app/Services/GedcomExportService.php (1 issue)

Labels
Severity
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2025 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Services;
21
22
use Fisharebest\Webtrees\Auth;
23
use Fisharebest\Webtrees\DB;
24
use Fisharebest\Webtrees\Encodings\UTF16BE;
25
use Fisharebest\Webtrees\Encodings\UTF16LE;
26
use Fisharebest\Webtrees\Encodings\UTF8;
27
use Fisharebest\Webtrees\Encodings\Windows1252;
28
use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
29
use Fisharebest\Webtrees\Gedcom;
30
use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
31
use Fisharebest\Webtrees\GedcomRecord;
32
use Fisharebest\Webtrees\Header;
33
use Fisharebest\Webtrees\Registry;
34
use Fisharebest\Webtrees\Site;
35
use Fisharebest\Webtrees\Tree;
36
use Fisharebest\Webtrees\Webtrees;
37
use Illuminate\Database\Query\Builder;
38
use Illuminate\Database\Query\Expression;
39
use Illuminate\Support\Collection;
40
use League\Flysystem\Filesystem;
41
use League\Flysystem\FilesystemOperator;
42
use Psr\Http\Message\ResponseFactoryInterface;
43
use Psr\Http\Message\ResponseInterface;
44
use Psr\Http\Message\StreamFactoryInterface;
45
use RuntimeException;
46
use ZipArchive;
47
48
use function addcslashes;
49
use function date;
50
use function explode;
51
use function fclose;
52
use function fopen;
53
use function fwrite;
54
use function is_string;
55
use function pathinfo;
56
use function preg_match_all;
57
use function rewind;
58
use function stream_filter_append;
59
use function stream_get_meta_data;
60
use function strlen;
61
use function strpos;
62
use function strtolower;
63
use function strtoupper;
64
use function tmpfile;
65
66
use const PATHINFO_EXTENSION;
67
use const PREG_SET_ORDER;
68
use const STREAM_FILTER_WRITE;
69
70
/**
71
 * Export data in GEDCOM format
72
 */
73
class GedcomExportService
74
{
75
    private const array ACCESS_LEVELS = [
0 ignored issues
show
A parse error occurred: Syntax error, unexpected T_STRING, expecting '=' on line 75 at column 24
Loading history...
76
        'gedadmin' => Auth::PRIV_NONE,
77
        'user'     => Auth::PRIV_USER,
78
        'visitor'  => Auth::PRIV_PRIVATE,
79
        'none'     => Auth::PRIV_HIDE,
80
    ];
81
82
    public function __construct(
83
        private readonly ResponseFactoryInterface $response_factory,
84
        private readonly StreamFactoryInterface $stream_factory,
85
    ) {
86
    }
87
88
    /**
89
     * @param Tree                                            $tree         Export data from this tree
90
     * @param bool                                            $sort_by_xref Write GEDCOM records in XREF order
91
     * @param string                                          $encoding     Convert from UTF-8 to other encoding
92
     * @param string                                          $privacy      Filter records by role
93
     * @param string                                          $line_endings CRLF or LF
94
     * @param string                                          $filename     Name of download file, without an extension
95
     * @param string                                          $format       One of: gedcom, zip, zipmedia, gedzip
96
     * @param Collection<int,string|object|GedcomRecord>|null $records
97
     */
98
    public function downloadResponse(
99
        Tree $tree,
100
        bool $sort_by_xref,
101
        string $encoding,
102
        string $privacy,
103
        string $line_endings,
104
        string $filename,
105
        string $format,
106
        Collection|null $records = null
107
    ): ResponseInterface {
108
        $access_level = self::ACCESS_LEVELS[$privacy];
109
110
        if ($format === 'gedcom') {
111
            $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records);
112
            $stream   = $this->stream_factory->createStreamFromResource($resource);
113
114
            return $this->response_factory->createResponse()
115
                ->withBody($stream)
116
                ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME)
117
                ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"');
118
        }
119
120
        // Create a new/empty .ZIP file
121
        $temp_zip_file  = stream_get_meta_data(tmpfile())['uri'];
122
        $zip_filesystem = new ZipArchive();
123
        $zip_filesystem->open($temp_zip_file, ZipArchive::CREATE | ZipArchive::OVERWRITE);
124
125
        if ($format === 'zipmedia') {
126
            $media_path = $tree->getPreference('MEDIA_DIRECTORY');
127
        } elseif ($format === 'gedzip') {
128
            $media_path = '';
129
        } else {
130
            // Don't add media
131
            $media_path = null;
132
        }
133
134
        $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path);
135
136
        if ($format === 'gedzip') {
137
            $zip_filesystem->addFromString('gedcom.ged', stream_get_contents($resource));
138
            $extension = '.gdz';
139
        } else {
140
            $zip_filesystem->addFromString($filename . '.ged', stream_get_contents($resource));
141
            $extension = '.zip';
142
        }
143
144
        fclose($resource);
145
146
        $zip_filesystem->close();
147
148
        $stream = $this->stream_factory->createStreamFromFile($temp_zip_file);
149
150
        return $this->response_factory->createResponse()
151
            ->withBody($stream)
152
            ->withHeader('content-type', 'application/zip')
153
            ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"');
154
    }
155
156
    /**
157
     * Write GEDCOM data to a stream.
158
     *
159
     * @param Tree                                            $tree           Export data from this tree
160
     * @param bool                                            $sort_by_xref   Write GEDCOM records in XREF order
161
     * @param string                                          $encoding       Convert from UTF-8 to other encoding
162
     * @param int                                             $access_level   Apply privacy filtering
163
     * @param string                                          $line_endings   CRLF or LF
164
     * @param Collection<int,string|object|GedcomRecord>|null $records        Just export these records
165
     * @param ZipArchive|FilesystemOperator|null              $zip_filesystem Write media files to this filesystem
166
     * @param string|null                                     $media_path     Location within the zip filesystem
167
     *
168
     * @return resource
169
     */
170
    public function export(
171
        Tree $tree,
172
        bool $sort_by_xref = false,
173
        string $encoding = UTF8::NAME,
174
        int $access_level = Auth::PRIV_HIDE,
175
        string $line_endings = 'CRLF',
176
        Collection|null $records = null,
177
        ZipArchive|FilesystemOperator|null $zip_filesystem = null,
178
        string|null $media_path = null
179
    ) {
180
        $stream = fopen('php://memory', 'wb+');
181
182
        if ($stream === false) {
183
            throw new RuntimeException('Failed to create temporary stream');
184
        }
185
186
        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
187
188
        if ($records instanceof Collection) {
189
            // Export just these records - e.g. from clippings cart.
190
            $data = [
191
                new Collection([$this->createHeader($tree, $encoding, false)]),
192
                $records,
193
                new Collection(['0 TRLR']),
194
            ];
195
        } elseif ($access_level === Auth::PRIV_HIDE) {
196
            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
197
            $data = [
198
                new Collection([$this->createHeader($tree, $encoding, true)]),
199
                $this->individualQuery($tree, $sort_by_xref)->cursor(),
200
                $this->familyQuery($tree, $sort_by_xref)->cursor(),
201
                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
202
                $this->otherQuery($tree, $sort_by_xref)->cursor(),
203
                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
204
                new Collection(['0 TRLR']),
205
            ];
206
        } else {
207
            // Disable the pending changes before creating GEDCOM records.
208
            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static fn (): Collection => new Collection());
209
210
            $data = [
211
                new Collection([$this->createHeader($tree, $encoding, true)]),
212
                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
213
                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
214
                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
215
                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
216
                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
217
                new Collection(['0 TRLR']),
218
            ];
219
        }
220
221
        $media_filesystem = $tree->mediaFilesystem();
222
223
        foreach ($data as $rows) {
224
            foreach ($rows as $datum) {
225
                if (is_string($datum)) {
226
                    $gedcom = $datum;
227
                } elseif ($datum instanceof GedcomRecord) {
228
                    $gedcom = $datum->privatizeGedcom($access_level);
229
230
                    if ($gedcom === '') {
231
                        continue;
232
                    }
233
                } else {
234
                    $gedcom =
235
                        $datum->i_gedcom ??
236
                        $datum->f_gedcom ??
237
                        $datum->s_gedcom ??
238
                        $datum->m_gedcom ??
239
                        $datum->o_gedcom;
240
                }
241
242
                if ($media_path !== null && preg_match('/^0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) {
243
                    preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER);
244
245
                    foreach ($matches as $match) {
246
                        $media_file = $match[1];
247
248
                        if ($media_filesystem->fileExists($media_file)) {
249
                            if ($zip_filesystem instanceof Filesystem) {
250
                                $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file));
251
                            }
252
253
                            if ($zip_filesystem instanceof ZipArchive) {
254
                                // If the media file is stored locally, we can add it directly to the ZipArchive
255
                                // $local_file = Site::getPreference('INDEX_DIRECTORY') . $tree->getPreference('MEDIA_DIRECTORY') . $media_path . $media_file;
256
                                // $zip_filesystem->addFile($local_file, $media_path . $media_file);
257
258
                                $zip_filesystem->addFromString($media_path . $media_file, $media_filesystem->read($media_file));
259
                            }
260
                        }
261
                    }
262
                }
263
264
                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
265
266
                if ($line_endings === 'CRLF') {
267
                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
268
                }
269
270
                $bytes_written = fwrite($stream, $gedcom);
271
272
                if ($bytes_written !== strlen($gedcom)) {
273
                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
274
                }
275
            }
276
        }
277
278
        if (rewind($stream) === false) {
279
            throw new RuntimeException('Cannot rewind temporary stream');
280
        }
281
282
        return $stream;
283
    }
284
285
    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
286
    {
287
        // Force a ".ged" suffix
288
        $filename = $tree->name();
289
290
        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
291
            $filename .= '.ged';
292
        }
293
294
        $gedcom_encodings = [
295
            UTF16BE::NAME     => 'UNICODE',
296
            UTF16LE::NAME     => 'UNICODE',
297
            Windows1252::NAME => 'ANSI',
298
        ];
299
300
        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
301
302
        // Build a new header record
303
        $gedcom = '0 HEAD';
304
        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
305
        $gedcom .= "\n2 NAME " . Webtrees::NAME;
306
        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
307
        $gedcom .= "\n1 DEST DISKETTE";
308
        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
309
        $gedcom .= "\n2 TIME " . date('H:i:s');
310
        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
311
        $gedcom .= "\n1 CHAR " . $encoding;
312
        $gedcom .= "\n1 FILE " . $filename;
313
314
        // Preserve some values from the original header
315
        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
316
317
        // There should always be a header record.
318
        if ($header instanceof Header) {
319
            foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
320
                $gedcom .= "\n" . $fact->gedcom();
321
            }
322
323
            if ($include_sub) {
324
                foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
325
                    $gedcom .= "\n" . $fact->gedcom();
326
                }
327
            }
328
        }
329
330
        return $gedcom;
331
    }
332
333
    public function wrapLongLines(string $gedcom, int $max_line_length): string
334
    {
335
        $lines = [];
336
337
        foreach (explode("\n", $gedcom) as $line) {
338
            // Split long lines
339
            // The total length of a GEDCOM line, including level number, cross-reference number,
340
            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
341
            if (mb_strlen($line) > $max_line_length) {
342
                [$level, $tag] = explode(' ', $line, 3);
343
                if ($tag !== 'CONT') {
344
                    $level++;
345
                }
346
                do {
347
                    // Split after $pos chars
348
                    $pos = $max_line_length;
349
                    // Split on a non-space (standard gedcom behavior)
350
                    while (mb_substr($line, $pos - 1, 1) === ' ') {
351
                        --$pos;
352
                    }
353
                    if ($pos === strpos($line, ' ', 3)) {
354
                        // No non-spaces in the data! Can’t split it :-(
355
                        break;
356
                    }
357
                    $lines[] = mb_substr($line, 0, $pos);
358
                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
359
                } while (mb_strlen($line) > $max_line_length);
360
            }
361
            $lines[] = $line;
362
        }
363
364
        return implode("\n", $lines);
365
    }
366
367
    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
368
    {
369
        $query = DB::table('families')
370
            ->where('f_file', '=', $tree->id())
371
            ->select(['f_gedcom', 'f_id']);
372
373
        if ($sort_by_xref) {
374
            $query
375
                ->orderBy(new Expression('LENGTH(f_id)'))
376
                ->orderBy('f_id');
377
        }
378
379
        return $query;
380
    }
381
382
    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
383
    {
384
        $query = DB::table('individuals')
385
            ->where('i_file', '=', $tree->id())
386
            ->select(['i_gedcom', 'i_id']);
387
388
        if ($sort_by_xref) {
389
            $query
390
                ->orderBy(new Expression('LENGTH(i_id)'))
391
                ->orderBy('i_id');
392
        }
393
394
        return $query;
395
    }
396
397
    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
398
    {
399
        $query = DB::table('sources')
400
            ->where('s_file', '=', $tree->id())
401
            ->select(['s_gedcom', 's_id']);
402
403
        if ($sort_by_xref) {
404
            $query
405
                ->orderBy(new Expression('LENGTH(s_id)'))
406
                ->orderBy('s_id');
407
        }
408
409
        return $query;
410
    }
411
412
    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
413
    {
414
        $query = DB::table('media')
415
            ->where('m_file', '=', $tree->id())
416
            ->select(['m_gedcom', 'm_id']);
417
418
        if ($sort_by_xref) {
419
            $query
420
                ->orderBy(new Expression('LENGTH(m_id)'))
421
                ->orderBy('m_id');
422
        }
423
424
        return $query;
425
    }
426
427
    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
428
    {
429
        $query = DB::table('other')
430
            ->where('o_file', '=', $tree->id())
431
            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
432
            ->select(['o_gedcom', 'o_id']);
433
434
        if ($sort_by_xref) {
435
            $query
436
                ->orderBy('o_type')
437
                ->orderBy(new Expression('LENGTH(o_id)'))
438
                ->orderBy('o_id');
439
        }
440
441
        return $query;
442
    }
443
}
444