Passed
Push — develop ( 72ec09...1c079c )
by Greg
15:28 queued 05:03
created

GedcomExportService::sourceQuery()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 8
nc 2
nop 2
dl 0
loc 13
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2022 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Services;
21
22
use Fisharebest\Webtrees\Auth;
23
use Fisharebest\Webtrees\Encodings\UTF16BE;
24
use Fisharebest\Webtrees\Encodings\UTF16LE;
25
use Fisharebest\Webtrees\Encodings\UTF8;
26
use Fisharebest\Webtrees\Encodings\Windows1252;
27
use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
28
use Fisharebest\Webtrees\Gedcom;
29
use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
30
use Fisharebest\Webtrees\GedcomRecord;
31
use Fisharebest\Webtrees\Header;
32
use Fisharebest\Webtrees\Registry;
33
use Fisharebest\Webtrees\Tree;
34
use Fisharebest\Webtrees\Webtrees;
35
use Illuminate\Database\Capsule\Manager as DB;
36
use Illuminate\Database\Query\Builder;
37
use Illuminate\Database\Query\Expression;
38
use Illuminate\Support\Collection;
39
use League\Flysystem\Filesystem;
40
use League\Flysystem\FilesystemOperator;
41
use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider;
42
use League\Flysystem\ZipArchive\ZipArchiveAdapter;
43
use Psr\Http\Message\ResponseFactoryInterface;
44
use Psr\Http\Message\ResponseInterface;
45
use Psr\Http\Message\StreamFactoryInterface;
46
use RuntimeException;
47
48
use function addcslashes;
49
use function date;
50
use function explode;
51
use function fclose;
52
use function fopen;
53
use function fwrite;
54
use function pathinfo;
55
use function preg_match_all;
56
use function rewind;
57
use function str_contains;
58
use function stream_filter_append;
59
use function stream_get_meta_data;
60
use function strlen;
61
use function strpos;
62
use function strtolower;
63
use function strtoupper;
64
use function tmpfile;
65
66
use const PATHINFO_EXTENSION;
67
use const PREG_SET_ORDER;
68
use const STREAM_FILTER_WRITE;
69
70
/**
71
 * Export data in GEDCOM format
72
 */
73
class GedcomExportService
74
{
75
    private const ACCESS_LEVELS = [
76
        'gedadmin' => Auth::PRIV_NONE,
77
        'user'     => Auth::PRIV_USER,
78
        'visitor'  => Auth::PRIV_PRIVATE,
79
        'none'     => Auth::PRIV_HIDE,
80
    ];
81
82
    private ResponseFactoryInterface $response_factory;
83
84
    private StreamFactoryInterface $stream_factory;
85
86
    /**
87
     * @param ResponseFactoryInterface $response_factory
88
     * @param StreamFactoryInterface   $stream_factory
89
     */
90
    public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory)
91
    {
92
        $this->response_factory = $response_factory;
93
        $this->stream_factory   = $stream_factory;
94
    }
95
96
    /**
97
     * @param Tree                        $tree           - Export data from this tree
98
     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
99
     * @param string                      $encoding       - Convert from UTF-8 to other encoding
100
     * @param string                      $privacy        - Filter records by role
101
     * @param string                      $filename       - Name of download file, without an extension
102
     * @param string                      $format         - One of: gedcom, zip, zipmedia, gedzip
103
     *
104
     * @return ResponseInterface
105
     */
106
    public function downloadResponse(
107
        Tree $tree,
108
        bool $sort_by_xref,
109
        string $encoding,
110
        string $privacy,
111
        string $line_endings,
112
        string $filename,
113
        string $format,
114
        Collection $records = null
115
    ): ResponseInterface {
116
        $access_level = self::ACCESS_LEVELS[$privacy];
117
118
        if ($format === 'gedcom') {
119
            $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records);
120
            $stream   = $this->stream_factory->createStreamFromResource($resource);
121
122
            return $this->response_factory->createResponse()
123
                ->withBody($stream)
124
                ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME)
125
                ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"');
126
        }
127
128
        // Create a new/empty .ZIP file
129
        $temp_zip_file  = stream_get_meta_data(tmpfile())['uri'];
130
        $zip_provider   = new FilesystemZipArchiveProvider($temp_zip_file, 0755);
131
        $zip_adapter    = new ZipArchiveAdapter($zip_provider);
132
        $zip_filesystem = new Filesystem($zip_adapter);
133
134
        if ($format === 'zipmedia') {
135
            $media_path = $tree->getPreference('MEDIA_DIRECTORY');
136
        } elseif ($format === 'gedzip') {
137
            $media_path = '';
138
        } else {
139
            // Don't add media
140
            $media_path = null;
141
        }
142
143
        $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path);
144
145
        if ($format === 'gedzip') {
146
            $zip_filesystem->writeStream('gedcom.ged', $resource);
147
            $extension = '.gdz';
148
        } else {
149
            $zip_filesystem->writeStream($filename . '.ged', $resource);
150
            $extension = '.zip';
151
        }
152
153
        fclose($resource);
154
155
        $stream = $this->stream_factory->createStreamFromFile($temp_zip_file);
156
157
        return $this->response_factory->createResponse()
158
            ->withBody($stream)
159
            ->withHeader('content-type', 'application/zip')
160
            ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"')  . $extension . '"');
161
    }
162
163
    /**
164
     * Write GEDCOM data to a stream.
165
     *
166
     * @param Tree                        $tree           - Export data from this tree
167
     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
168
     * @param string                      $encoding       - Convert from UTF-8 to other encoding
169
     * @param int                         $access_level   - Apply privacy filtering
170
     * @param string                      $line_endings   - CRLF or LF
171
     * @param Collection<int,string>|null $records        - Just export these records
172
     * @param FilesystemOperator|null     $zip_filesystem - Write media files to this filesystem
173
     * @param string|null                 $media_path     - Location within the zip filesystem
174
     *
175
     * @return resource
176
     */
177
    public function export(
178
        Tree $tree,
179
        bool $sort_by_xref = false,
180
        string $encoding = UTF8::NAME,
181
        int $access_level = Auth::PRIV_HIDE,
182
        string $line_endings = 'CRLF',
183
        Collection $records = null,
184
        FilesystemOperator $zip_filesystem = null,
185
        string $media_path = null
186
    ) {
187
        $stream = fopen('php://memory', 'wb+');
188
189
        if ($stream === false) {
190
            throw new RuntimeException('Failed to create temporary stream');
191
        }
192
193
        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
194
195
        if ($records instanceof Collection) {
196
            // Export just these records - e.g. from clippings cart.
197
            $data = [
198
                new Collection([$this->createHeader($tree, $encoding, false)]),
0 ignored issues
show
Bug introduced by
array($this->createHeade...ree, $encoding, false)) of type array<integer,string> is incompatible with the type Illuminate\Contracts\Support\Arrayable expected by parameter $items of Illuminate\Support\Collection::__construct(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

198
                new Collection(/** @scrutinizer ignore-type */ [$this->createHeader($tree, $encoding, false)]),
Loading history...
199
                $records,
200
                new Collection(['0 TRLR']),
201
            ];
202
        } elseif ($access_level === Auth::PRIV_HIDE) {
203
            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
204
            $data = [
205
                new Collection([$this->createHeader($tree, $encoding, true)]),
206
                $this->individualQuery($tree, $sort_by_xref)->cursor(),
207
                $this->familyQuery($tree, $sort_by_xref)->cursor(),
208
                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
209
                $this->otherQuery($tree, $sort_by_xref)->cursor(),
210
                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
211
                new Collection(['0 TRLR']),
212
            ];
213
        } else {
214
            // Disable the pending changes before creating GEDCOM records.
215
            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
216
                return new Collection();
217
            });
218
219
            $data = [
220
                new Collection([$this->createHeader($tree, $encoding, true)]),
221
                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
222
                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
223
                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
224
                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
225
                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
226
                new Collection(['0 TRLR']),
227
            ];
228
        }
229
230
        $media_filesystem = Registry::filesystem()->media($tree);
231
232
        foreach ($data as $rows) {
233
            foreach ($rows as $datum) {
234
                if (is_string($datum)) {
235
                    $gedcom = $datum;
236
                } elseif ($datum instanceof GedcomRecord) {
237
                    $gedcom = $datum->privatizeGedcom($access_level);
238
                } else {
239
                    $gedcom =
240
                        $datum->i_gedcom ??
241
                        $datum->f_gedcom ??
242
                        $datum->s_gedcom ??
243
                        $datum->m_gedcom ??
244
                        $datum->o_gedcom;
245
                }
246
247
                if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) {
248
                    preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER);
249
250
                    foreach ($matches as $match) {
251
                        $media_file = $match[1];
252
253
                        if ($media_filesystem->fileExists($media_file)) {
254
                            $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file));
255
                        }
256
                    }
257
                }
258
259
                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
260
261
                if ($line_endings === 'CRLF') {
262
                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
263
                }
264
265
                $bytes_written = fwrite($stream, $gedcom);
266
267
                if ($bytes_written !== strlen($gedcom)) {
268
                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
269
                }
270
            }
271
        }
272
273
        if (rewind($stream) === false) {
274
            throw new RuntimeException('Cannot rewind temporary stream');
275
        }
276
277
        return $stream;
278
    }
279
280
    /**
281
     * Create a header record for a gedcom file.
282
     *
283
     * @param Tree   $tree
284
     * @param string $encoding
285
     * @param bool   $include_sub
286
     *
287
     * @return string
288
     */
289
    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
290
    {
291
        // Force a ".ged" suffix
292
        $filename = $tree->name();
293
294
        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
295
            $filename .= '.ged';
296
        }
297
298
        $gedcom_encodings = [
299
            UTF16BE::NAME     => 'UNICODE',
300
            UTF16LE::NAME     => 'UNICODE',
301
            Windows1252::NAME => 'ANSI',
302
        ];
303
304
        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
305
306
        // Build a new header record
307
        $gedcom = '0 HEAD';
308
        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
309
        $gedcom .= "\n2 NAME " . Webtrees::NAME;
310
        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
311
        $gedcom .= "\n1 DEST DISKETTE";
312
        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
313
        $gedcom .= "\n2 TIME " . date('H:i:s');
314
        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
315
        $gedcom .= "\n1 CHAR " . $encoding;
316
        $gedcom .= "\n1 FILE " . $filename;
317
318
        // Preserve some values from the original header
319
        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
320
321
        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
322
            $gedcom .= "\n" . $fact->gedcom();
323
        }
324
325
        if ($include_sub) {
326
            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
327
                $gedcom .= "\n" . $fact->gedcom();
328
            }
329
        }
330
331
        return $gedcom;
332
    }
333
334
    /**
335
     * Prepend a media path, such as might have been removed during import.
336
     *
337
     * @param string $gedcom
338
     * @param string $media_path
339
     *
340
     * @return string
341
     */
342
    private function convertMediaPath(string $gedcom, string $media_path): string
343
    {
344
        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
345
            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
346
                $filename = $match[1];
347
348
                // Don’t modify external links
349
                if (!str_contains($filename, '://')) {
350
                    $filename = $media_path . $filename;
351
                }
352
353
                return "\n1 FILE " . $filename;
354
            }, $gedcom);
355
        }
356
357
        return $gedcom;
358
    }
359
360
    /**
361
     * Wrap long lines using concatenation records.
362
     *
363
     * @param string $gedcom
364
     * @param int    $max_line_length
365
     *
366
     * @return string
367
     */
368
    public function wrapLongLines(string $gedcom, int $max_line_length): string
369
    {
370
        $lines = [];
371
372
        foreach (explode("\n", $gedcom) as $line) {
373
            // Split long lines
374
            // The total length of a GEDCOM line, including level number, cross-reference number,
375
            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
376
            if (mb_strlen($line) > $max_line_length) {
377
                [$level, $tag] = explode(' ', $line, 3);
378
                if ($tag !== 'CONT') {
379
                    $level++;
380
                }
381
                do {
382
                    // Split after $pos chars
383
                    $pos = $max_line_length;
384
                    // Split on a non-space (standard gedcom behavior)
385
                    while (mb_substr($line, $pos - 1, 1) === ' ') {
386
                        --$pos;
387
                    }
388
                    if ($pos === strpos($line, ' ', 3)) {
389
                        // No non-spaces in the data! Can’t split it :-(
390
                        break;
391
                    }
392
                    $lines[] = mb_substr($line, 0, $pos);
393
                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
394
                } while (mb_strlen($line) > $max_line_length);
395
            }
396
            $lines[] = $line;
397
        }
398
399
        return implode("\n", $lines);
400
    }
401
402
    /**
403
     * @param Tree $tree
404
     * @param bool $sort_by_xref
405
     *
406
     * @return Builder
407
     */
408
    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
409
    {
410
        $query = DB::table('families')
411
            ->where('f_file', '=', $tree->id())
412
            ->select(['f_gedcom', 'f_id']);
413
414
415
        if ($sort_by_xref) {
416
            $query
417
                ->orderBy(new Expression('LENGTH(f_id)'))
418
                ->orderBy('f_id');
419
        }
420
421
        return $query;
422
    }
423
424
    /**
425
     * @param Tree $tree
426
     * @param bool $sort_by_xref
427
     *
428
     * @return Builder
429
     */
430
    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
431
    {
432
        $query = DB::table('individuals')
433
            ->where('i_file', '=', $tree->id())
434
            ->select(['i_gedcom', 'i_id']);
435
436
        if ($sort_by_xref) {
437
            $query
438
                ->orderBy(new Expression('LENGTH(i_id)'))
439
                ->orderBy('i_id');
440
        }
441
442
        return $query;
443
    }
444
445
    /**
446
     * @param Tree $tree
447
     * @param bool $sort_by_xref
448
     *
449
     * @return Builder
450
     */
451
    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
452
    {
453
        $query = DB::table('sources')
454
            ->where('s_file', '=', $tree->id())
455
            ->select(['s_gedcom', 's_id']);
456
457
        if ($sort_by_xref) {
458
            $query
459
                ->orderBy(new Expression('LENGTH(s_id)'))
460
                ->orderBy('s_id');
461
        }
462
463
        return $query;
464
    }
465
466
    /**
467
     * @param Tree $tree
468
     * @param bool $sort_by_xref
469
     *
470
     * @return Builder
471
     */
472
    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
473
    {
474
        $query = DB::table('media')
475
            ->where('m_file', '=', $tree->id())
476
            ->select(['m_gedcom', 'm_id']);
477
478
        if ($sort_by_xref) {
479
            $query
480
                ->orderBy(new Expression('LENGTH(m_id)'))
481
                ->orderBy('m_id');
482
        }
483
484
        return $query;
485
    }
486
487
    /**
488
     * @param Tree $tree
489
     * @param bool $sort_by_xref
490
     *
491
     * @return Builder
492
     */
493
    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
494
    {
495
        $query = DB::table('other')
496
            ->where('o_file', '=', $tree->id())
497
            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
498
            ->select(['o_gedcom', 'o_id']);
499
500
        if ($sort_by_xref) {
501
            $query
502
                ->orderBy('o_type')
503
                ->orderBy(new Expression('LENGTH(o_id)'))
504
                ->orderBy('o_id');
505
        }
506
507
        return $query;
508
    }
509
}
510