1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* webtrees: online genealogy |
5
|
|
|
* Copyright (C) 2023 webtrees development team |
6
|
|
|
* This program is free software: you can redistribute it and/or modify |
7
|
|
|
* it under the terms of the GNU General Public License as published by |
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
* (at your option) any later version. |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* You should have received a copy of the GNU General Public License |
15
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>. |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
declare(strict_types=1); |
19
|
|
|
|
20
|
|
|
namespace Fisharebest\Webtrees\Services; |
21
|
|
|
|
22
|
|
|
use Fisharebest\Webtrees\Auth; |
23
|
|
|
use Fisharebest\Webtrees\DB; |
24
|
|
|
use Fisharebest\Webtrees\Encodings\UTF16BE; |
25
|
|
|
use Fisharebest\Webtrees\Encodings\UTF16LE; |
26
|
|
|
use Fisharebest\Webtrees\Encodings\UTF8; |
27
|
|
|
use Fisharebest\Webtrees\Encodings\Windows1252; |
28
|
|
|
use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; |
29
|
|
|
use Fisharebest\Webtrees\Gedcom; |
30
|
|
|
use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; |
31
|
|
|
use Fisharebest\Webtrees\GedcomRecord; |
32
|
|
|
use Fisharebest\Webtrees\Header; |
33
|
|
|
use Fisharebest\Webtrees\Registry; |
34
|
|
|
use Fisharebest\Webtrees\Tree; |
35
|
|
|
use Fisharebest\Webtrees\Webtrees; |
36
|
|
|
use Illuminate\Database\Query\Builder; |
37
|
|
|
use Illuminate\Database\Query\Expression; |
38
|
|
|
use Illuminate\Support\Collection; |
39
|
|
|
use League\Flysystem\Filesystem; |
40
|
|
|
use League\Flysystem\FilesystemOperator; |
41
|
|
|
use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider; |
42
|
|
|
use League\Flysystem\ZipArchive\ZipArchiveAdapter; |
43
|
|
|
use Psr\Http\Message\ResponseFactoryInterface; |
44
|
|
|
use Psr\Http\Message\ResponseInterface; |
45
|
|
|
use Psr\Http\Message\StreamFactoryInterface; |
46
|
|
|
use RuntimeException; |
47
|
|
|
|
48
|
|
|
use function addcslashes; |
49
|
|
|
use function date; |
50
|
|
|
use function explode; |
51
|
|
|
use function fclose; |
52
|
|
|
use function fopen; |
53
|
|
|
use function fwrite; |
54
|
|
|
use function is_string; |
55
|
|
|
use function pathinfo; |
56
|
|
|
use function preg_match_all; |
57
|
|
|
use function rewind; |
58
|
|
|
use function stream_filter_append; |
59
|
|
|
use function stream_get_meta_data; |
60
|
|
|
use function strlen; |
61
|
|
|
use function strpos; |
62
|
|
|
use function strtolower; |
63
|
|
|
use function strtoupper; |
64
|
|
|
use function tmpfile; |
65
|
|
|
|
66
|
|
|
use const PATHINFO_EXTENSION; |
67
|
|
|
use const PREG_SET_ORDER; |
68
|
|
|
use const STREAM_FILTER_WRITE; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Export data in GEDCOM format |
72
|
|
|
*/ |
73
|
|
|
class GedcomExportService |
74
|
|
|
{ |
75
|
|
|
private const ACCESS_LEVELS = [ |
76
|
|
|
'gedadmin' => Auth::PRIV_NONE, |
77
|
|
|
'user' => Auth::PRIV_USER, |
78
|
|
|
'visitor' => Auth::PRIV_PRIVATE, |
79
|
|
|
'none' => Auth::PRIV_HIDE, |
80
|
|
|
]; |
81
|
|
|
|
82
|
|
|
private ResponseFactoryInterface $response_factory; |
83
|
|
|
|
84
|
|
|
private StreamFactoryInterface $stream_factory; |
85
|
|
|
|
86
|
|
|
public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory) |
87
|
|
|
{ |
88
|
|
|
$this->response_factory = $response_factory; |
89
|
|
|
$this->stream_factory = $stream_factory; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* @param Tree $tree Export data from this tree |
94
|
|
|
* @param bool $sort_by_xref Write GEDCOM records in XREF order |
95
|
|
|
* @param string $encoding Convert from UTF-8 to other encoding |
96
|
|
|
* @param string $privacy Filter records by role |
97
|
|
|
* @param string $line_endings CRLF or LF |
98
|
|
|
* @param string $filename Name of download file, without an extension |
99
|
|
|
* @param string $format One of: gedcom, zip, zipmedia, gedzip |
100
|
|
|
* @param Collection<int,string|object|GedcomRecord>|null $records |
101
|
|
|
*/ |
102
|
|
|
public function downloadResponse( |
103
|
|
|
Tree $tree, |
104
|
|
|
bool $sort_by_xref, |
105
|
|
|
string $encoding, |
106
|
|
|
string $privacy, |
107
|
|
|
string $line_endings, |
108
|
|
|
string $filename, |
109
|
|
|
string $format, |
110
|
|
|
Collection $records = null |
111
|
|
|
): ResponseInterface { |
112
|
|
|
$access_level = self::ACCESS_LEVELS[$privacy]; |
113
|
|
|
|
114
|
|
|
if ($format === 'gedcom') { |
115
|
|
|
$resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records); |
116
|
|
|
$stream = $this->stream_factory->createStreamFromResource($resource); |
117
|
|
|
|
118
|
|
|
return $this->response_factory->createResponse() |
119
|
|
|
->withBody($stream) |
120
|
|
|
->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME) |
121
|
|
|
->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"'); |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
// Create a new/empty .ZIP file |
125
|
|
|
$temp_zip_file = stream_get_meta_data(tmpfile())['uri']; |
126
|
|
|
$zip_provider = new FilesystemZipArchiveProvider($temp_zip_file, 0755); |
127
|
|
|
$zip_adapter = new ZipArchiveAdapter($zip_provider); |
128
|
|
|
$zip_filesystem = new Filesystem($zip_adapter); |
129
|
|
|
|
130
|
|
|
if ($format === 'zipmedia') { |
131
|
|
|
$media_path = $tree->getPreference('MEDIA_DIRECTORY'); |
132
|
|
|
} elseif ($format === 'gedzip') { |
133
|
|
|
$media_path = ''; |
134
|
|
|
} else { |
135
|
|
|
// Don't add media |
136
|
|
|
$media_path = null; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
$resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path); |
140
|
|
|
|
141
|
|
|
if ($format === 'gedzip') { |
142
|
|
|
$zip_filesystem->writeStream('gedcom.ged', $resource); |
143
|
|
|
$extension = '.gdz'; |
144
|
|
|
} else { |
145
|
|
|
$zip_filesystem->writeStream($filename . '.ged', $resource); |
146
|
|
|
$extension = '.zip'; |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
fclose($resource); |
150
|
|
|
|
151
|
|
|
$stream = $this->stream_factory->createStreamFromFile($temp_zip_file); |
152
|
|
|
|
153
|
|
|
return $this->response_factory->createResponse() |
154
|
|
|
->withBody($stream) |
155
|
|
|
->withHeader('content-type', 'application/zip') |
156
|
|
|
->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"'); |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Write GEDCOM data to a stream. |
161
|
|
|
* |
162
|
|
|
* @param Tree $tree Export data from this tree |
163
|
|
|
* @param bool $sort_by_xref Write GEDCOM records in XREF order |
164
|
|
|
* @param string $encoding Convert from UTF-8 to other encoding |
165
|
|
|
* @param int $access_level Apply privacy filtering |
166
|
|
|
* @param string $line_endings CRLF or LF |
167
|
|
|
* @param Collection<int,string|object|GedcomRecord>|null $records Just export these records |
168
|
|
|
* @param FilesystemOperator|null $zip_filesystem Write media files to this filesystem |
169
|
|
|
* @param string|null $media_path Location within the zip filesystem |
170
|
|
|
* |
171
|
|
|
* @return resource |
172
|
|
|
*/ |
173
|
|
|
public function export( |
174
|
|
|
Tree $tree, |
175
|
|
|
bool $sort_by_xref = false, |
176
|
|
|
string $encoding = UTF8::NAME, |
177
|
|
|
int $access_level = Auth::PRIV_HIDE, |
178
|
|
|
string $line_endings = 'CRLF', |
179
|
|
|
Collection|null $records = null, |
180
|
|
|
FilesystemOperator|null $zip_filesystem = null, |
181
|
|
|
string $media_path = null |
182
|
|
|
) { |
183
|
|
|
$stream = fopen('php://memory', 'wb+'); |
184
|
|
|
|
185
|
|
|
if ($stream === false) { |
186
|
|
|
throw new RuntimeException('Failed to create temporary stream'); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); |
190
|
|
|
|
191
|
|
|
if ($records instanceof Collection) { |
192
|
|
|
// Export just these records - e.g. from clippings cart. |
193
|
|
|
$data = [ |
194
|
|
|
new Collection([$this->createHeader($tree, $encoding, false)]), |
|
|
|
|
195
|
|
|
$records, |
196
|
|
|
new Collection(['0 TRLR']), |
197
|
|
|
]; |
198
|
|
|
} elseif ($access_level === Auth::PRIV_HIDE) { |
199
|
|
|
// If we will be applying privacy filters, then we will need the GEDCOM record objects. |
200
|
|
|
$data = [ |
201
|
|
|
new Collection([$this->createHeader($tree, $encoding, true)]), |
202
|
|
|
$this->individualQuery($tree, $sort_by_xref)->cursor(), |
203
|
|
|
$this->familyQuery($tree, $sort_by_xref)->cursor(), |
204
|
|
|
$this->sourceQuery($tree, $sort_by_xref)->cursor(), |
205
|
|
|
$this->otherQuery($tree, $sort_by_xref)->cursor(), |
206
|
|
|
$this->mediaQuery($tree, $sort_by_xref)->cursor(), |
207
|
|
|
new Collection(['0 TRLR']), |
208
|
|
|
]; |
209
|
|
|
} else { |
210
|
|
|
// Disable the pending changes before creating GEDCOM records. |
211
|
|
|
Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static fn (): Collection => new Collection()); |
212
|
|
|
|
213
|
|
|
$data = [ |
214
|
|
|
new Collection([$this->createHeader($tree, $encoding, true)]), |
215
|
|
|
$this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), |
216
|
|
|
$this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), |
217
|
|
|
$this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), |
218
|
|
|
$this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), |
219
|
|
|
$this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), |
220
|
|
|
new Collection(['0 TRLR']), |
221
|
|
|
]; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
$media_filesystem = $tree->mediaFilesystem(); |
225
|
|
|
|
226
|
|
|
foreach ($data as $rows) { |
227
|
|
|
foreach ($rows as $datum) { |
228
|
|
|
if (is_string($datum)) { |
229
|
|
|
$gedcom = $datum; |
230
|
|
|
} elseif ($datum instanceof GedcomRecord) { |
231
|
|
|
$gedcom = $datum->privatizeGedcom($access_level); |
232
|
|
|
|
233
|
|
|
if ($gedcom === '') { |
234
|
|
|
continue; |
235
|
|
|
} |
236
|
|
|
} else { |
237
|
|
|
$gedcom = |
238
|
|
|
$datum->i_gedcom ?? |
239
|
|
|
$datum->f_gedcom ?? |
240
|
|
|
$datum->s_gedcom ?? |
241
|
|
|
$datum->m_gedcom ?? |
242
|
|
|
$datum->o_gedcom; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) { |
246
|
|
|
preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER); |
247
|
|
|
|
248
|
|
|
foreach ($matches as $match) { |
249
|
|
|
$media_file = $match[1]; |
250
|
|
|
|
251
|
|
|
if ($media_filesystem->fileExists($media_file)) { |
252
|
|
|
$zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file)); |
253
|
|
|
} |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
$gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; |
258
|
|
|
|
259
|
|
|
if ($line_endings === 'CRLF') { |
260
|
|
|
$gedcom = strtr($gedcom, ["\n" => "\r\n"]); |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
$bytes_written = fwrite($stream, $gedcom); |
264
|
|
|
|
265
|
|
|
if ($bytes_written !== strlen($gedcom)) { |
266
|
|
|
throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); |
267
|
|
|
} |
268
|
|
|
} |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
if (rewind($stream) === false) { |
272
|
|
|
throw new RuntimeException('Cannot rewind temporary stream'); |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
return $stream; |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
public function createHeader(Tree $tree, string $encoding, bool $include_sub): string |
279
|
|
|
{ |
280
|
|
|
// Force a ".ged" suffix |
281
|
|
|
$filename = $tree->name(); |
282
|
|
|
|
283
|
|
|
if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { |
284
|
|
|
$filename .= '.ged'; |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
$gedcom_encodings = [ |
288
|
|
|
UTF16BE::NAME => 'UNICODE', |
289
|
|
|
UTF16LE::NAME => 'UNICODE', |
290
|
|
|
Windows1252::NAME => 'ANSI', |
291
|
|
|
]; |
292
|
|
|
|
293
|
|
|
$encoding = $gedcom_encodings[$encoding] ?? $encoding; |
294
|
|
|
|
295
|
|
|
// Build a new header record |
296
|
|
|
$gedcom = '0 HEAD'; |
297
|
|
|
$gedcom .= "\n1 SOUR " . Webtrees::NAME; |
298
|
|
|
$gedcom .= "\n2 NAME " . Webtrees::NAME; |
299
|
|
|
$gedcom .= "\n2 VERS " . Webtrees::VERSION; |
300
|
|
|
$gedcom .= "\n1 DEST DISKETTE"; |
301
|
|
|
$gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); |
302
|
|
|
$gedcom .= "\n2 TIME " . date('H:i:s'); |
303
|
|
|
$gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; |
304
|
|
|
$gedcom .= "\n1 CHAR " . $encoding; |
305
|
|
|
$gedcom .= "\n1 FILE " . $filename; |
306
|
|
|
|
307
|
|
|
// Preserve some values from the original header |
308
|
|
|
$header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); |
309
|
|
|
|
310
|
|
|
// There should always be a header record. |
311
|
|
|
if ($header instanceof Header) { |
|
|
|
|
312
|
|
|
foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { |
313
|
|
|
$gedcom .= "\n" . $fact->gedcom(); |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
if ($include_sub) { |
317
|
|
|
foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { |
318
|
|
|
$gedcom .= "\n" . $fact->gedcom(); |
319
|
|
|
} |
320
|
|
|
} |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
return $gedcom; |
324
|
|
|
} |
325
|
|
|
|
326
|
|
|
public function wrapLongLines(string $gedcom, int $max_line_length): string |
327
|
|
|
{ |
328
|
|
|
$lines = []; |
329
|
|
|
|
330
|
|
|
foreach (explode("\n", $gedcom) as $line) { |
331
|
|
|
// Split long lines |
332
|
|
|
// The total length of a GEDCOM line, including level number, cross-reference number, |
333
|
|
|
// tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. |
334
|
|
|
if (mb_strlen($line) > $max_line_length) { |
335
|
|
|
[$level, $tag] = explode(' ', $line, 3); |
336
|
|
|
if ($tag !== 'CONT') { |
337
|
|
|
$level++; |
338
|
|
|
} |
339
|
|
|
do { |
340
|
|
|
// Split after $pos chars |
341
|
|
|
$pos = $max_line_length; |
342
|
|
|
// Split on a non-space (standard gedcom behavior) |
343
|
|
|
while (mb_substr($line, $pos - 1, 1) === ' ') { |
344
|
|
|
--$pos; |
345
|
|
|
} |
346
|
|
|
if ($pos === strpos($line, ' ', 3)) { |
347
|
|
|
// No non-spaces in the data! Can’t split it :-( |
348
|
|
|
break; |
349
|
|
|
} |
350
|
|
|
$lines[] = mb_substr($line, 0, $pos); |
351
|
|
|
$line = $level . ' CONC ' . mb_substr($line, $pos); |
352
|
|
|
} while (mb_strlen($line) > $max_line_length); |
353
|
|
|
} |
354
|
|
|
$lines[] = $line; |
355
|
|
|
} |
356
|
|
|
|
357
|
|
|
return implode("\n", $lines); |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
private function familyQuery(Tree $tree, bool $sort_by_xref): Builder |
361
|
|
|
{ |
362
|
|
|
$query = DB::table('families') |
363
|
|
|
->where('f_file', '=', $tree->id()) |
364
|
|
|
->select(['f_gedcom', 'f_id']); |
365
|
|
|
|
366
|
|
|
if ($sort_by_xref) { |
367
|
|
|
$query |
368
|
|
|
->orderBy(new Expression('LENGTH(f_id)')) |
|
|
|
|
369
|
|
|
->orderBy('f_id'); |
|
|
|
|
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
return $query; |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
private function individualQuery(Tree $tree, bool $sort_by_xref): Builder |
376
|
|
|
{ |
377
|
|
|
$query = DB::table('individuals') |
378
|
|
|
->where('i_file', '=', $tree->id()) |
379
|
|
|
->select(['i_gedcom', 'i_id']); |
380
|
|
|
|
381
|
|
|
if ($sort_by_xref) { |
382
|
|
|
$query |
383
|
|
|
->orderBy(new Expression('LENGTH(i_id)')) |
|
|
|
|
384
|
|
|
->orderBy('i_id'); |
|
|
|
|
385
|
|
|
} |
386
|
|
|
|
387
|
|
|
return $query; |
388
|
|
|
} |
389
|
|
|
|
390
|
|
|
private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder |
391
|
|
|
{ |
392
|
|
|
$query = DB::table('sources') |
393
|
|
|
->where('s_file', '=', $tree->id()) |
394
|
|
|
->select(['s_gedcom', 's_id']); |
395
|
|
|
|
396
|
|
|
if ($sort_by_xref) { |
397
|
|
|
$query |
398
|
|
|
->orderBy(new Expression('LENGTH(s_id)')) |
|
|
|
|
399
|
|
|
->orderBy('s_id'); |
|
|
|
|
400
|
|
|
} |
401
|
|
|
|
402
|
|
|
return $query; |
403
|
|
|
} |
404
|
|
|
|
405
|
|
|
private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder |
406
|
|
|
{ |
407
|
|
|
$query = DB::table('media') |
408
|
|
|
->where('m_file', '=', $tree->id()) |
409
|
|
|
->select(['m_gedcom', 'm_id']); |
410
|
|
|
|
411
|
|
|
if ($sort_by_xref) { |
412
|
|
|
$query |
413
|
|
|
->orderBy(new Expression('LENGTH(m_id)')) |
|
|
|
|
414
|
|
|
->orderBy('m_id'); |
|
|
|
|
415
|
|
|
} |
416
|
|
|
|
417
|
|
|
return $query; |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
private function otherQuery(Tree $tree, bool $sort_by_xref): Builder |
421
|
|
|
{ |
422
|
|
|
$query = DB::table('other') |
423
|
|
|
->where('o_file', '=', $tree->id()) |
424
|
|
|
->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) |
425
|
|
|
->select(['o_gedcom', 'o_id']); |
426
|
|
|
|
427
|
|
|
if ($sort_by_xref) { |
428
|
|
|
$query |
429
|
|
|
->orderBy('o_type') |
|
|
|
|
430
|
|
|
->orderBy(new Expression('LENGTH(o_id)')) |
|
|
|
|
431
|
|
|
->orderBy('o_id'); |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
return $query; |
435
|
|
|
} |
436
|
|
|
} |
437
|
|
|
|