Passed
Push — main ( be0746...771780 )
by Greg
09:16 queued 03:06
created

GedcomExportService::export()   C

Complexity

Conditions 16
Paths 121

Size

Total Lines 101
Code Lines 58

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 16
eloc 58
nc 121
nop 8
dl 0
loc 101
rs 5.3916
c 0
b 0
f 0

How to fix   Long Method    Complexity    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2022 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Services;
21
22
use Fisharebest\Webtrees\Auth;
23
use Fisharebest\Webtrees\Encodings\UTF16BE;
24
use Fisharebest\Webtrees\Encodings\UTF16LE;
25
use Fisharebest\Webtrees\Encodings\UTF8;
26
use Fisharebest\Webtrees\Encodings\Windows1252;
27
use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
28
use Fisharebest\Webtrees\Gedcom;
29
use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
30
use Fisharebest\Webtrees\GedcomRecord;
31
use Fisharebest\Webtrees\Header;
32
use Fisharebest\Webtrees\Registry;
33
use Fisharebest\Webtrees\Tree;
34
use Fisharebest\Webtrees\Webtrees;
35
use Illuminate\Database\Capsule\Manager as DB;
36
use Illuminate\Database\Query\Builder;
37
use Illuminate\Database\Query\Expression;
38
use Illuminate\Support\Collection;
39
use League\Flysystem\Filesystem;
40
use League\Flysystem\FilesystemOperator;
41
use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider;
42
use League\Flysystem\ZipArchive\ZipArchiveAdapter;
43
use Psr\Http\Message\ResponseFactoryInterface;
44
use Psr\Http\Message\ResponseInterface;
45
use Psr\Http\Message\StreamFactoryInterface;
46
use RuntimeException;
47
48
use function addcslashes;
49
use function date;
50
use function explode;
51
use function fclose;
52
use function fopen;
53
use function fwrite;
54
use function is_string;
55
use function pathinfo;
56
use function preg_match_all;
57
use function rewind;
58
use function stream_filter_append;
59
use function stream_get_meta_data;
60
use function strlen;
61
use function strpos;
62
use function strtolower;
63
use function strtoupper;
64
use function tmpfile;
65
66
use const PATHINFO_EXTENSION;
67
use const PREG_SET_ORDER;
68
use const STREAM_FILTER_WRITE;
69
70
/**
71
 * Export data in GEDCOM format
72
 */
73
class GedcomExportService
74
{
75
    private const ACCESS_LEVELS = [
76
        'gedadmin' => Auth::PRIV_NONE,
77
        'user'     => Auth::PRIV_USER,
78
        'visitor'  => Auth::PRIV_PRIVATE,
79
        'none'     => Auth::PRIV_HIDE,
80
    ];
81
82
    private ResponseFactoryInterface $response_factory;
83
84
    private StreamFactoryInterface $stream_factory;
85
86
    /**
87
     * @param ResponseFactoryInterface $response_factory
88
     * @param StreamFactoryInterface   $stream_factory
89
     */
90
    public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory)
91
    {
92
        $this->response_factory = $response_factory;
93
        $this->stream_factory   = $stream_factory;
94
    }
95
96
    /**
97
     * @param Tree            $tree         - Export data from this tree
98
     * @param bool            $sort_by_xref - Write GEDCOM records in XREF order
99
     * @param string          $encoding     - Convert from UTF-8 to other encoding
100
     * @param string          $privacy      - Filter records by role
101
     * @param string          $line_endings
102
     * @param string          $filename     - Name of download file, without an extension
103
     * @param string          $format       - One of: gedcom, zip, zipmedia, gedzip
104
     * @param Collection|null $records
105
     *
106
     * @return ResponseInterface
107
     */
108
    public function downloadResponse(
109
        Tree $tree,
110
        bool $sort_by_xref,
111
        string $encoding,
112
        string $privacy,
113
        string $line_endings,
114
        string $filename,
115
        string $format,
116
        Collection $records = null
117
    ): ResponseInterface {
118
        $access_level = self::ACCESS_LEVELS[$privacy];
119
120
        if ($format === 'gedcom') {
121
            $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records);
122
            $stream   = $this->stream_factory->createStreamFromResource($resource);
123
124
            return $this->response_factory->createResponse()
125
                ->withBody($stream)
126
                ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME)
127
                ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"');
128
        }
129
130
        // Create a new/empty .ZIP file
131
        $temp_zip_file  = stream_get_meta_data(tmpfile())['uri'];
132
        $zip_provider   = new FilesystemZipArchiveProvider($temp_zip_file, 0755);
133
        $zip_adapter    = new ZipArchiveAdapter($zip_provider);
134
        $zip_filesystem = new Filesystem($zip_adapter);
135
136
        if ($format === 'zipmedia') {
137
            $media_path = $tree->getPreference('MEDIA_DIRECTORY');
138
        } elseif ($format === 'gedzip') {
139
            $media_path = '';
140
        } else {
141
            // Don't add media
142
            $media_path = null;
143
        }
144
145
        $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path);
146
147
        if ($format === 'gedzip') {
148
            $zip_filesystem->writeStream('gedcom.ged', $resource);
149
            $extension = '.gdz';
150
        } else {
151
            $zip_filesystem->writeStream($filename . '.ged', $resource);
152
            $extension = '.zip';
153
        }
154
155
        fclose($resource);
156
157
        $stream = $this->stream_factory->createStreamFromFile($temp_zip_file);
158
159
        return $this->response_factory->createResponse()
160
            ->withBody($stream)
161
            ->withHeader('content-type', 'application/zip')
162
            ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"');
163
    }
164
165
    /**
166
     * Write GEDCOM data to a stream.
167
     *
168
     * @param Tree                        $tree           - Export data from this tree
169
     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
170
     * @param string                      $encoding       - Convert from UTF-8 to other encoding
171
     * @param int                         $access_level   - Apply privacy filtering
172
     * @param string                      $line_endings   - CRLF or LF
173
     * @param Collection<int,string>|null $records        - Just export these records
174
     * @param FilesystemOperator|null     $zip_filesystem - Write media files to this filesystem
175
     * @param string|null                 $media_path     - Location within the zip filesystem
176
     *
177
     * @return resource
178
     */
179
    public function export(
180
        Tree $tree,
181
        bool $sort_by_xref = false,
182
        string $encoding = UTF8::NAME,
183
        int $access_level = Auth::PRIV_HIDE,
184
        string $line_endings = 'CRLF',
185
        Collection $records = null,
186
        FilesystemOperator $zip_filesystem = null,
187
        string $media_path = null
188
    ) {
189
        $stream = fopen('php://memory', 'wb+');
190
191
        if ($stream === false) {
192
            throw new RuntimeException('Failed to create temporary stream');
193
        }
194
195
        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
196
197
        if ($records instanceof Collection) {
198
            // Export just these records - e.g. from clippings cart.
199
            $data = [
200
                new Collection([$this->createHeader($tree, $encoding, false)]),
0 ignored issues
show
Bug introduced by
array($this->createHeade...ree, $encoding, false)) of type array<integer,string> is incompatible with the type Illuminate\Contracts\Support\Arrayable expected by parameter $items of Illuminate\Support\Collection::__construct(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

200
                new Collection(/** @scrutinizer ignore-type */ [$this->createHeader($tree, $encoding, false)]),
Loading history...
201
                $records,
202
                new Collection(['0 TRLR']),
203
            ];
204
        } elseif ($access_level === Auth::PRIV_HIDE) {
205
            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
206
            $data = [
207
                new Collection([$this->createHeader($tree, $encoding, true)]),
208
                $this->individualQuery($tree, $sort_by_xref)->cursor(),
209
                $this->familyQuery($tree, $sort_by_xref)->cursor(),
210
                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
211
                $this->otherQuery($tree, $sort_by_xref)->cursor(),
212
                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
213
                new Collection(['0 TRLR']),
214
            ];
215
        } else {
216
            // Disable the pending changes before creating GEDCOM records.
217
            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
218
                return new Collection();
219
            });
220
221
            $data = [
222
                new Collection([$this->createHeader($tree, $encoding, true)]),
223
                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
224
                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
225
                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
226
                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
227
                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
228
                new Collection(['0 TRLR']),
229
            ];
230
        }
231
232
        $media_filesystem = $tree->mediaFilesystem();
233
234
        foreach ($data as $rows) {
235
            foreach ($rows as $datum) {
236
                if (is_string($datum)) {
237
                    $gedcom = $datum;
238
                } elseif ($datum instanceof GedcomRecord) {
239
                    $gedcom = $datum->privatizeGedcom($access_level);
240
                } else {
241
                    $gedcom =
242
                        $datum->i_gedcom ??
243
                        $datum->f_gedcom ??
244
                        $datum->s_gedcom ??
245
                        $datum->m_gedcom ??
246
                        $datum->o_gedcom;
247
                }
248
249
                if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) {
250
                    preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER);
251
252
                    foreach ($matches as $match) {
253
                        $media_file = $match[1];
254
255
                        if ($media_filesystem->fileExists($media_file)) {
256
                            $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file));
257
                        }
258
                    }
259
                }
260
261
                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
262
263
                if ($line_endings === 'CRLF') {
264
                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
265
                }
266
267
                $bytes_written = fwrite($stream, $gedcom);
268
269
                if ($bytes_written !== strlen($gedcom)) {
270
                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
271
                }
272
            }
273
        }
274
275
        if (rewind($stream) === false) {
276
            throw new RuntimeException('Cannot rewind temporary stream');
277
        }
278
279
        return $stream;
280
    }
281
282
    /**
283
     * Create a header record for a gedcom file.
284
     *
285
     * @param Tree   $tree
286
     * @param string $encoding
287
     * @param bool   $include_sub
288
     *
289
     * @return string
290
     */
291
    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
292
    {
293
        // Force a ".ged" suffix
294
        $filename = $tree->name();
295
296
        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
297
            $filename .= '.ged';
298
        }
299
300
        $gedcom_encodings = [
301
            UTF16BE::NAME     => 'UNICODE',
302
            UTF16LE::NAME     => 'UNICODE',
303
            Windows1252::NAME => 'ANSI',
304
        ];
305
306
        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
307
308
        // Build a new header record
309
        $gedcom = '0 HEAD';
310
        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
311
        $gedcom .= "\n2 NAME " . Webtrees::NAME;
312
        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
313
        $gedcom .= "\n1 DEST DISKETTE";
314
        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
315
        $gedcom .= "\n2 TIME " . date('H:i:s');
316
        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
317
        $gedcom .= "\n1 CHAR " . $encoding;
318
        $gedcom .= "\n1 FILE " . $filename;
319
320
        // Preserve some values from the original header
321
        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
322
323
        // There should always be a header record.
324
        if ($header instanceof Header) {
0 ignored issues
show
introduced by
$header is always a sub-type of Fisharebest\Webtrees\Header.
Loading history...
325
            foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
326
                $gedcom .= "\n" . $fact->gedcom();
327
            }
328
329
            if ($include_sub) {
330
                foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
331
                    $gedcom .= "\n" . $fact->gedcom();
332
                }
333
            }
334
        }
335
336
        return $gedcom;
337
    }
338
339
    /**
340
     * Wrap long lines using concatenation records.
341
     *
342
     * @param string $gedcom
343
     * @param int    $max_line_length
344
     *
345
     * @return string
346
     */
347
    public function wrapLongLines(string $gedcom, int $max_line_length): string
348
    {
349
        $lines = [];
350
351
        foreach (explode("\n", $gedcom) as $line) {
352
            // Split long lines
353
            // The total length of a GEDCOM line, including level number, cross-reference number,
354
            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
355
            if (mb_strlen($line) > $max_line_length) {
356
                [$level, $tag] = explode(' ', $line, 3);
357
                if ($tag !== 'CONT') {
358
                    $level++;
359
                }
360
                do {
361
                    // Split after $pos chars
362
                    $pos = $max_line_length;
363
                    // Split on a non-space (standard gedcom behavior)
364
                    while (mb_substr($line, $pos - 1, 1) === ' ') {
365
                        --$pos;
366
                    }
367
                    if ($pos === strpos($line, ' ', 3)) {
368
                        // No non-spaces in the data! Can’t split it :-(
369
                        break;
370
                    }
371
                    $lines[] = mb_substr($line, 0, $pos);
372
                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
373
                } while (mb_strlen($line) > $max_line_length);
374
            }
375
            $lines[] = $line;
376
        }
377
378
        return implode("\n", $lines);
379
    }
380
381
    /**
382
     * @param Tree $tree
383
     * @param bool $sort_by_xref
384
     *
385
     * @return Builder
386
     */
387
    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
388
    {
389
        $query = DB::table('families')
390
            ->where('f_file', '=', $tree->id())
391
            ->select(['f_gedcom', 'f_id']);
392
393
394
        if ($sort_by_xref) {
395
            $query
396
                ->orderBy(new Expression('LENGTH(f_id)'))
397
                ->orderBy('f_id');
398
        }
399
400
        return $query;
401
    }
402
403
    /**
404
     * @param Tree $tree
405
     * @param bool $sort_by_xref
406
     *
407
     * @return Builder
408
     */
409
    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
410
    {
411
        $query = DB::table('individuals')
412
            ->where('i_file', '=', $tree->id())
413
            ->select(['i_gedcom', 'i_id']);
414
415
        if ($sort_by_xref) {
416
            $query
417
                ->orderBy(new Expression('LENGTH(i_id)'))
418
                ->orderBy('i_id');
419
        }
420
421
        return $query;
422
    }
423
424
    /**
425
     * @param Tree $tree
426
     * @param bool $sort_by_xref
427
     *
428
     * @return Builder
429
     */
430
    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
431
    {
432
        $query = DB::table('sources')
433
            ->where('s_file', '=', $tree->id())
434
            ->select(['s_gedcom', 's_id']);
435
436
        if ($sort_by_xref) {
437
            $query
438
                ->orderBy(new Expression('LENGTH(s_id)'))
439
                ->orderBy('s_id');
440
        }
441
442
        return $query;
443
    }
444
445
    /**
446
     * @param Tree $tree
447
     * @param bool $sort_by_xref
448
     *
449
     * @return Builder
450
     */
451
    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
452
    {
453
        $query = DB::table('media')
454
            ->where('m_file', '=', $tree->id())
455
            ->select(['m_gedcom', 'm_id']);
456
457
        if ($sort_by_xref) {
458
            $query
459
                ->orderBy(new Expression('LENGTH(m_id)'))
460
                ->orderBy('m_id');
461
        }
462
463
        return $query;
464
    }
465
466
    /**
467
     * @param Tree $tree
468
     * @param bool $sort_by_xref
469
     *
470
     * @return Builder
471
     */
472
    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
473
    {
474
        $query = DB::table('other')
475
            ->where('o_file', '=', $tree->id())
476
            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
477
            ->select(['o_gedcom', 'o_id']);
478
479
        if ($sort_by_xref) {
480
            $query
481
                ->orderBy('o_type')
482
                ->orderBy(new Expression('LENGTH(o_id)'))
483
                ->orderBy('o_id');
484
        }
485
486
        return $query;
487
    }
488
}
489