Passed
Push — main ( 1a9891...51cc45 )
by Greg
07:05
created

GedcomExportService::export()   C

Complexity

Conditions 11
Paths 67

Size

Total Lines 84
Code Lines 51

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 51
c 0
b 0
f 0
nc 67
nop 6
dl 0
loc 84
rs 6.9224

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2021 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Services;
21
22
use Fisharebest\Webtrees\Auth;
23
use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
24
use Fisharebest\Webtrees\Registry;
25
use Fisharebest\Webtrees\Gedcom;
26
use Fisharebest\Webtrees\GedcomRecord;
27
use Fisharebest\Webtrees\Header;
28
use Fisharebest\Webtrees\Tree;
29
use Fisharebest\Webtrees\Webtrees;
30
use Illuminate\Database\Capsule\Manager as DB;
31
use Illuminate\Database\Query\Builder;
32
use Illuminate\Database\Query\Expression;
33
use Illuminate\Support\Collection;
34
use RuntimeException;
35
36
use function date;
37
use function explode;
38
use function fopen;
39
use function fwrite;
40
use function mb_convert_encoding;
41
use function pathinfo;
42
use function rewind;
43
use function str_contains;
44
use function str_starts_with;
45
use function strlen;
46
use function strpos;
47
use function strtolower;
48
use function strtoupper;
49
use function utf8_decode;
50
51
use const PATHINFO_EXTENSION;
52
53
/**
54
 * Export data in GEDCOM format
55
 */
56
class GedcomExportService
57
{
58
    /**
59
     * Write GEDCOM data to a stream.
60
     *
61
     * @param Tree                    $tree         - Export data from this tree
62
     * @param bool                    $sort_by_xref - Write GEDCOM records in XREF order
63
     * @param string                  $encoding     - Convert from UTF-8 to other encoding
64
     * @param int                     $access_level - Apply privacy filtering
65
     * @param string                  $media_path   - Prepend path to media filenames
66
     * @param Collection<string>|null $records      - Just export these records
67
     *
68
     * @return resource
69
     */
70
    public function export(
71
        Tree $tree,
72
        bool $sort_by_xref = false,
73
        string $encoding = 'UTF-8',
74
        int $access_level = Auth::PRIV_HIDE,
75
        string $media_path = '',
76
        Collection $records = null
77
    ) {
78
        $stream = fopen('php://memory', 'wb+');
79
80
        if ($stream === false) {
81
            throw new RuntimeException('Failed to create temporary stream');
82
        }
83
84
        if ($records instanceof Collection) {
85
            // Export just these records - e.g. from clippings cart.
86
            $data = [
87
                new Collection([$this->createHeader($tree, $encoding, false)]),
88
                $records,
89
                new Collection(['0 TRLR']),
90
            ];
91
        } elseif ($access_level === Auth::PRIV_HIDE) {
92
            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
93
            $data = [
94
                new Collection([$this->createHeader($tree, $encoding, true)]),
95
                $this->individualQuery($tree, $sort_by_xref)->cursor(),
96
                $this->familyQuery($tree, $sort_by_xref)->cursor(),
97
                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
98
                $this->otherQuery($tree, $sort_by_xref)->cursor(),
99
                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
100
                new Collection(['0 TRLR']),
101
            ];
102
        } else {
103
            // Disable the pending changes before creating GEDCOM records.
104
            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
105
                return new Collection();
106
            });
107
108
            $data = [
109
                new Collection([$this->createHeader($tree, $encoding, true)]),
110
                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
111
                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
112
                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
113
                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
114
                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
115
                new Collection(['0 TRLR']),
116
            ];
117
        }
118
119
        foreach ($data as $rows) {
120
            foreach ($rows as $datum) {
121
                if (is_string($datum)) {
122
                    $gedcom = $datum;
123
                } elseif ($datum instanceof GedcomRecord) {
124
                    $gedcom = $datum->privatizeGedcom($access_level);
125
                } else {
126
                    $gedcom =
127
                        $datum->i_gedcom ??
128
                        $datum->f_gedcom ??
129
                        $datum->s_gedcom ??
130
                        $datum->m_gedcom ??
131
                        $datum->o_gedcom;
132
                }
133
134
                if ($media_path !== '') {
135
                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
136
                }
137
138
                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL;
139
                $gedcom = $this->convertEncoding($encoding, $gedcom);
140
141
                $bytes_written = fwrite($stream, $gedcom);
142
143
                if ($bytes_written !== strlen($gedcom)) {
144
                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
145
                }
146
            }
147
        }
148
149
        if (rewind($stream) === false) {
150
            throw new RuntimeException('Cannot rewind temporary stream');
151
        }
152
153
        return $stream;
154
    }
155
156
    /**
157
     * Create a header record for a gedcom file.
158
     *
159
     * @param Tree   $tree
160
     * @param string $encoding
161
     * @param bool   $include_sub
162
     *
163
     * @return string
164
     */
165
    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
166
    {
167
        // Force a ".ged" suffix
168
        $filename = $tree->name();
169
170
        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
1 ignored issue
show
Bug introduced by
It seems like pathinfo($filename, PATHINFO_EXTENSION) can also be of type array; however, parameter $string of strtolower() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

170
        if (strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
Loading history...
171
            $filename .= '.ged';
172
        }
173
174
        // Build a new header record
175
        $gedcom = '0 HEAD';
176
        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
177
        $gedcom .= "\n2 NAME " . Webtrees::NAME;
178
        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
179
        $gedcom .= "\n1 DEST DISKETTE";
180
        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
181
        $gedcom .= "\n2 TIME " . date('H:i:s');
182
        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
183
        $gedcom .= "\n1 CHAR " . $encoding;
184
        $gedcom .= "\n1 FILE " . $filename;
185
186
        // Preserve some values from the original header
187
        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
188
189
        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
190
            $gedcom .= "\n" . $fact->gedcom();
191
        }
192
193
        if ($include_sub) {
194
            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
195
                $gedcom .= "\n" . $fact->gedcom();
196
            }
197
        }
198
199
        return $gedcom;
200
    }
201
202
    /**
203
     * Prepend a media path, such as might have been removed during import.
204
     *
205
     * @param string $gedcom
206
     * @param string $media_path
207
     *
208
     * @return string
209
     */
210
    private function convertMediaPath(string $gedcom, string $media_path): string
211
    {
212
        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
213
            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
214
                $filename = $match[1];
215
216
                // Don’t modify external links
217
                if (!str_contains($filename, '://')) {
218
                    // Convert separators to match new path.
219
                    if (str_contains($media_path, '\\')) {
220
                        $filename = strtr($filename, ['/' => '\\']);
221
                    }
222
223
                    if (!str_starts_with($filename, $media_path)) {
224
                        $filename = $media_path . $filename;
225
                    }
226
                }
227
228
                return "\n1 FILE " . $filename;
229
            }, $gedcom);
230
        }
231
232
        return $gedcom;
233
    }
234
235
    /**
236
     * @param string $encoding
237
     * @param string $gedcom
238
     *
239
     * @return string
240
     */
241
    private function convertEncoding(string $encoding, string $gedcom): string
242
    {
243
        switch ($encoding) {
244
            case 'ANSI':
245
                // Many desktop applications interpret ANSI as ISO-8859-1
246
                return utf8_decode($gedcom);
247
248
            case 'ANSEL':
249
                // coming soon...?
250
            case 'ASCII':
251
                // Might be needed by really old software?
252
                return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII');
253
254
            default:
255
                return $gedcom;
256
        }
257
    }
258
259
    /**
260
     * Wrap long lines using concatenation records.
261
     *
262
     * @param string $gedcom
263
     * @param int    $max_line_length
264
     *
265
     * @return string
266
     */
267
    public function wrapLongLines(string $gedcom, int $max_line_length): string
268
    {
269
        $lines = [];
270
271
        foreach (explode("\n", $gedcom) as $line) {
272
            // Split long lines
273
            // The total length of a GEDCOM line, including level number, cross-reference number,
274
            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
275
            if (mb_strlen($line) > $max_line_length) {
276
                [$level, $tag] = explode(' ', $line, 3);
277
                if ($tag !== 'CONT') {
278
                    $level++;
279
                }
280
                do {
281
                    // Split after $pos chars
282
                    $pos = $max_line_length;
283
                    // Split on a non-space (standard gedcom behavior)
284
                    while (mb_substr($line, $pos - 1, 1) === ' ') {
285
                        --$pos;
286
                    }
287
                    if ($pos === strpos($line, ' ', 3)) {
288
                        // No non-spaces in the data! Can’t split it :-(
289
                        break;
290
                    }
291
                    $lines[] = mb_substr($line, 0, $pos);
292
                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
293
                } while (mb_strlen($line) > $max_line_length);
294
            }
295
            $lines[] = $line;
296
        }
297
298
        return implode(Gedcom::EOL, $lines);
299
    }
300
301
    /**
302
     * @param Tree $tree
303
     * @param bool $sort_by_xref
304
     *
305
     * @return Builder
306
     */
307
    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
308
    {
309
        $query = DB::table('families')
310
            ->where('f_file', '=', $tree->id())
311
            ->select(['f_gedcom', 'f_id']);
312
313
314
        if ($sort_by_xref) {
315
            $query
316
                ->orderBy(new Expression('LENGTH(f_id)'))
317
                ->orderBy('f_id');
318
        }
319
320
        return $query;
321
    }
322
323
    /**
324
     * @param Tree $tree
325
     * @param bool $sort_by_xref
326
     *
327
     * @return Builder
328
     */
329
    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
330
    {
331
        $query = DB::table('individuals')
332
            ->where('i_file', '=', $tree->id())
333
            ->select(['i_gedcom', 'i_id']);
334
335
        if ($sort_by_xref) {
336
            $query
337
                ->orderBy(new Expression('LENGTH(i_id)'))
338
                ->orderBy('i_id');
339
        }
340
341
        return $query;
342
    }
343
344
    /**
345
     * @param Tree $tree
346
     * @param bool $sort_by_xref
347
     *
348
     * @return Builder
349
     */
350
    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
351
    {
352
        $query = DB::table('sources')
353
            ->where('s_file', '=', $tree->id())
354
            ->select(['s_gedcom', 's_id']);
355
356
        if ($sort_by_xref) {
357
            $query
358
                ->orderBy(new Expression('LENGTH(s_id)'))
359
                ->orderBy('s_id');
360
        }
361
362
        return $query;
363
    }
364
365
    /**
366
     * @param Tree $tree
367
     * @param bool $sort_by_xref
368
     *
369
     * @return Builder
370
     */
371
    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
372
    {
373
        $query = DB::table('media')
374
            ->where('m_file', '=', $tree->id())
375
            ->select(['m_gedcom', 'm_id']);
376
377
        if ($sort_by_xref) {
378
            $query
379
                ->orderBy(new Expression('LENGTH(m_id)'))
380
                ->orderBy('m_id');
381
        }
382
383
        return $query;
384
    }
385
386
    /**
387
     * @param Tree $tree
388
     * @param bool $sort_by_xref
389
     *
390
     * @return Builder
391
     */
392
    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
393
    {
394
        $query = DB::table('other')
395
            ->where('o_file', '=', $tree->id())
396
            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
397
            ->select(['o_gedcom', 'o_id']);
398
399
        if ($sort_by_xref) {
400
            $query
401
                ->orderBy('o_type')
402
                ->orderBy(new Expression('LENGTH(o_id)'))
403
                ->orderBy('o_id');
404
        }
405
406
        return $query;
407
    }
408
}
409