Completed
Push — master ( 1e9c29...6fd018 )
by Greg
14:03 queued 07:52
created

GedcomLoad   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 194
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 120
dl 0
loc 194
rs 9.68
c 0
b 0
f 0
wmc 34

2 Methods

Rating   Name   Duplication   Size   Complexity  
F handle() 0 172 33
A __construct() 0 3 1
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2020 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Http\RequestHandlers;
21
22
use Exception;
23
use Fisharebest\Webtrees\Exceptions\GedcomErrorException;
24
use Fisharebest\Webtrees\Functions\FunctionsImport;
25
use Fisharebest\Webtrees\Gedcom;
26
use Fisharebest\Webtrees\Http\ViewResponseTrait;
27
use Fisharebest\Webtrees\I18N;
28
use Fisharebest\Webtrees\Services\TimeoutService;
29
use Fisharebest\Webtrees\Tree;
30
use Illuminate\Database\Capsule\Manager as DB;
31
use Illuminate\Database\Query\Expression;
32
use Psr\Http\Message\ResponseInterface;
33
use Psr\Http\Message\ServerRequestInterface;
34
use Psr\Http\Server\RequestHandlerInterface;
35
36
use function assert;
37
use function preg_match;
38
use function preg_split;
39
use function response;
40
use function str_replace;
41
use function str_starts_with;
42
use function strlen;
43
use function strtoupper;
44
use function substr;
45
use function trim;
46
use function view;
47
48
/**
49
 * Load a chunk of GEDCOM data.
50
 */
51
class GedcomLoad implements RequestHandlerInterface
52
{
53
    use ViewResponseTrait;
54
55
    /** @var TimeoutService */
56
    private $timeout_service;
57
58
    /**
59
     * GedcomLoad constructor.
60
     *
61
     * @param TimeoutService $timeout_service
62
     */
63
    public function __construct(TimeoutService $timeout_service)
64
    {
65
        $this->timeout_service = $timeout_service;
66
    }
67
68
    /**
69
     * @param ServerRequestInterface $request
70
     *
71
     * @return ResponseInterface
72
     */
73
    public function handle(ServerRequestInterface $request): ResponseInterface
74
    {
75
        $this->layout = 'layouts/ajax';
76
77
        $tree = $request->getAttribute('tree');
78
        assert($tree instanceof Tree);
79
80
        try {
81
            // Only allow one process to import each gedcom at a time
82
            DB::table('gedcom_chunk')
83
                ->where('gedcom_id', '=', $tree->id())
84
                ->lockForUpdate()
85
                ->get();
86
87
            // What is the current import status?
88
            $import_offset = DB::table('gedcom_chunk')
89
                ->where('gedcom_id', '=', $tree->id())
90
                ->where('imported', '=', '1')
91
                ->count();
92
93
            $import_total = DB::table('gedcom_chunk')
94
                ->where('gedcom_id', '=', $tree->id())
95
                ->count();
96
97
            // Finished?
98
            if ($import_offset === $import_total) {
99
                $tree->setPreference('imported', '1');
100
101
                $html = view('admin/import-complete', ['tree' => $tree]);
102
103
                return response($html);
104
            }
105
106
            // Calculate progress so far
107
            $progress = $import_offset / $import_total;
108
109
            $first_time = ($import_offset === 0);
110
111
            // Collect up any errors, and show them later.
112
            $errors = '';
113
114
            // Run for a short period of time. This keeps the resource requirements low.
115
            do {
116
                $data = DB::table('gedcom_chunk')
117
                    ->where('gedcom_id', '=', $tree->id())
118
                    ->where('imported', '=', '0')
119
                    ->orderBy('gedcom_chunk_id')
120
                    ->select(['gedcom_chunk_id', 'chunk_data'])
121
                    ->first();
122
123
                // If we are loading the first (header) record, make sure the encoding is UTF-8.
124
                if ($first_time) {
125
                    // Remove any byte-order-mark
126
                    if (str_starts_with($data->chunk_data, Gedcom::UTF8_BOM)) {
127
                        $data->chunk_data = substr($data->chunk_data, strlen(Gedcom::UTF8_BOM));
128
                        // Put it back in the database, so we can do character conversion
129
                        DB::table('gedcom_chunk')
130
                            ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
131
                            ->update(['chunk_data' => $data->chunk_data]);
132
                    }
133
134
                    if (!str_starts_with($data->chunk_data, '0 HEAD')) {
135
                        return $this->viewResponse('admin/import-fail', [
136
                            'error' => I18N::translate('Invalid GEDCOM file - no header record found.'),
137
                            'tree'  => $tree,
138
                        ]);
139
                    }
140
141
                    // What character set is this? Need to convert it to UTF8
142
                    if (preg_match('/[\r\n][ \t]*1 CHAR(?:ACTER)? (.+)/', $data->chunk_data, $match)) {
143
                        $charset = strtoupper(trim($match[1]));
144
                    } else {
145
                        $charset = 'ASCII';
146
                    }
147
                    // MySQL supports a wide range of collation conversions. These are ones that
148
                    // have been encountered "in the wild".
149
                    switch ($charset) {
150
                        case 'ASCII':
151
                            DB::table('gedcom_chunk')
152
                                ->where('gedcom_id', '=', $tree->id())
153
                                ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING ascii) USING utf8)')]);
154
                            break;
155
                        case 'IBMPC':   // IBMPC, IBM WINDOWS and MS-DOS could be anything. Mostly it means CP850.
156
                        case 'IBM WINDOWS':
157
                        case 'MS-DOS':
158
                        case 'CP437':
159
                        case 'CP850':
160
                            // CP850 has extra letters with diacritics to replace box-drawing chars in CP437.
161
                            DB::table('gedcom_chunk')
162
                                ->where('gedcom_id', '=', $tree->id())
163
                                ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING cp850) USING utf8)')]);
164
                            break;
165
                        case 'ANSI': // ANSI could be anything. Most applications seem to treat it as latin1.
166
                        case 'WINDOWS':
167
                        case 'CP1252':
168
                        case 'ISO8859-1':
169
                        case 'ISO-8859-1':
170
                        case 'LATIN1':
171
                        case 'LATIN-1':
172
                            // Convert from ISO-8859-1 (western european) to UTF8.
173
                            DB::table('gedcom_chunk')
174
                                ->where('gedcom_id', '=', $tree->id())
175
                                ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING latin1) USING utf8)')]);
176
                            break;
177
                        case 'CP1250':
178
                        case 'ISO8859-2':
179
                        case 'ISO-8859-2':
180
                        case 'LATIN2':
181
                        case 'LATIN-2':
182
                            // Convert from ISO-8859-2 (eastern european) to UTF8.
183
                            DB::table('gedcom_chunk')
184
                                ->where('gedcom_id', '=', $tree->id())
185
                                ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING latin2) USING utf8)')]);
186
                            break;
187
                        case 'MACINTOSH':
188
                            // Convert from MAC Roman to UTF8.
189
                            DB::table('gedcom_chunk')
190
                                ->where('gedcom_id', '=', $tree->id())
191
                                ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING macroman) USING utf8)')]);
192
                            break;
193
                        case 'UTF8':
194
                        case 'UTF-8':
195
                            // Already UTF-8 so nothing to do!
196
                            break;
197
                        case 'ANSEL':
198
                        default:
199
                            return $this->viewResponse('admin/import-fail', [
200
                                'error' => I18N::translate('Error: converting GEDCOM files from %s encoding to UTF-8 encoding not currently supported.', $charset),
201
                                'tree'  => $tree,
202
                            ]);
203
                    }
204
                    $first_time = false;
205
206
                    // Re-fetch the data, now that we have performed character set conversion.
207
                    $data = DB::table('gedcom_chunk')
208
                        ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
209
                        ->select(['gedcom_chunk_id', 'chunk_data'])
210
                        ->first();
211
                }
212
213
                if (!$data) {
214
                    break;
215
                }
216
217
                $data->chunk_data = str_replace("\r", "\n", $data->chunk_data);
218
219
                // Import all the records in this chunk of data
220
                foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) {
221
                    try {
222
                        FunctionsImport::importRecord($rec, $tree, false);
223
                    } catch (GedcomErrorException $exception) {
224
                        $errors .= $exception->getMessage();
225
                    }
226
                }
227
228
                // Mark the chunk as imported
229
                DB::table('gedcom_chunk')
230
                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
231
                    ->update(['imported' => 1]);
232
            } while (!$this->timeout_service->isTimeLimitUp());
233
234
            return $this->viewResponse('admin/import-progress', [
235
                'errors'   => $errors,
236
                'progress' => $progress,
237
                'tree'     => $tree,
238
            ]);
239
        } catch (Exception $ex) {
240
            DB::connection()->rollBack();
241
242
            return $this->viewResponse('admin/import-fail', [
243
                'error' => $ex->getMessage(),
244
                'tree'  => $tree,
245
            ]);
246
        }
247
    }
248
}
249