@@ -28,14 +28,14 @@ discard block |
||
28 | 28 | $tree = Tree::findById($gedcom_id); |
29 | 29 | |
30 | 30 | if (!$tree || !Auth::isManager($tree, Auth::user())) { |
31 | - http_response_code(403); |
|
31 | + http_response_code(403); |
|
32 | 32 | |
33 | - return; |
|
33 | + return; |
|
34 | 34 | } |
35 | 35 | |
36 | 36 | $controller = new AjaxController; |
37 | 37 | $controller |
38 | - ->pageHeader(); |
|
38 | + ->pageHeader(); |
|
39 | 39 | |
40 | 40 | // Run in a transaction |
41 | 41 | Database::beginTransaction(); |
@@ -45,22 +45,22 @@ discard block |
||
45 | 45 | |
46 | 46 | // What is the current import status? |
47 | 47 | $row = Database::prepare( |
48 | - "SELECT" . |
|
49 | - " SUM(IF(imported, LENGTH(chunk_data), 0)) AS import_offset," . |
|
50 | - " SUM(LENGTH(chunk_data)) AS import_total" . |
|
51 | - " FROM `##gedcom_chunk` WHERE gedcom_id=?" |
|
48 | + "SELECT" . |
|
49 | + " SUM(IF(imported, LENGTH(chunk_data), 0)) AS import_offset," . |
|
50 | + " SUM(LENGTH(chunk_data)) AS import_total" . |
|
51 | + " FROM `##gedcom_chunk` WHERE gedcom_id=?" |
|
52 | 52 | )->execute(array($gedcom_id))->fetchOneRow(); |
53 | 53 | |
54 | 54 | if ($row->import_offset == $row->import_total) { |
55 | - Tree::findById($gedcom_id)->setPreference('imported', '1'); |
|
56 | - // Finished? Show the maintenance links, similar to admin_trees_manage.php |
|
57 | - Database::commit(); |
|
58 | - $controller->addInlineJavascript( |
|
59 | - 'jQuery("#import' . $gedcom_id . '").addClass("hidden");' . |
|
60 | - 'jQuery("#actions' . $gedcom_id . '").removeClass("hidden");' |
|
61 | - ); |
|
62 | - |
|
63 | - return; |
|
55 | + Tree::findById($gedcom_id)->setPreference('imported', '1'); |
|
56 | + // Finished? Show the maintenance links, similar to admin_trees_manage.php |
|
57 | + Database::commit(); |
|
58 | + $controller->addInlineJavascript( |
|
59 | + 'jQuery("#import' . $gedcom_id . '").addClass("hidden");' . |
|
60 | + 'jQuery("#actions' . $gedcom_id . '").removeClass("hidden");' |
|
61 | + ); |
|
62 | + |
|
63 | + return; |
|
64 | 64 | } |
65 | 65 | |
66 | 66 | // Calculate progress so far |
@@ -84,149 +84,149 @@ discard block |
||
84 | 84 | $first_time = ($row->import_offset == 0); |
85 | 85 | // Run for one second. This keeps the resource requirements low. |
86 | 86 | for ($end_time = microtime(true) + 1.0; microtime(true) < $end_time;) { |
87 | - $data = Database::prepare( |
|
88 | - "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
89 | - " FROM `##gedcom_chunk`" . |
|
90 | - " WHERE gedcom_id=? AND NOT imported" . |
|
91 | - " ORDER BY gedcom_chunk_id" . |
|
92 | - " LIMIT 1" |
|
93 | - )->execute(array($gedcom_id))->fetchOneRow(); |
|
94 | - // If we are loading the first (header) record, make sure the encoding is UTF-8. |
|
95 | - if ($first_time) { |
|
96 | - // Remove any byte-order-mark |
|
97 | - Database::prepare( |
|
98 | - "UPDATE `##gedcom_chunk`" . |
|
99 | - " SET chunk_data=TRIM(LEADING ? FROM chunk_data)" . |
|
100 | - " WHERE gedcom_chunk_id=?" |
|
101 | - )->execute(array(WT_UTF8_BOM, $data->gedcom_chunk_id)); |
|
102 | - // Re-fetch the data, now that we have removed the BOM |
|
103 | - $data = Database::prepare( |
|
104 | - "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
105 | - " FROM `##gedcom_chunk`" . |
|
106 | - " WHERE gedcom_chunk_id=?" |
|
107 | - )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
108 | - if (substr($data->chunk_data, 0, 6) != '0 HEAD') { |
|
109 | - Database::rollBack(); |
|
110 | - echo I18N::translate('Invalid GEDCOM file - no header record found.'); |
|
111 | - $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
112 | - |
|
113 | - return; |
|
114 | - } |
|
115 | - // What character set is this? Need to convert it to UTF8 |
|
116 | - if (preg_match('/[\r\n][ \t]*1 CHAR(?:ACTER)? (.+)/', $data->chunk_data, $match)) { |
|
117 | - $charset = trim(strtoupper($match[1])); |
|
118 | - } else { |
|
119 | - $charset = 'ASCII'; |
|
120 | - } |
|
121 | - // MySQL supports a wide range of collation conversions. These are ones that |
|
122 | - // have been encountered "in the wild". |
|
123 | - switch ($charset) { |
|
124 | - case 'ASCII': |
|
125 | - Database::prepare( |
|
126 | - "UPDATE `##gedcom_chunk`" . |
|
127 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING ascii) USING utf8)" . |
|
128 | - " WHERE gedcom_id=?" |
|
129 | - )->execute(array($gedcom_id)); |
|
130 | - break; |
|
131 | - case 'IBMPC': // IBMPC, IBM WINDOWS and MS-DOS could be anything. Mostly it means CP850. |
|
132 | - case 'IBM WINDOWS': |
|
133 | - case 'MS-DOS': |
|
134 | - case 'CP437': |
|
135 | - case 'CP850': |
|
136 | - // CP850 has extra letters with diacritics to replace box-drawing chars in CP437. |
|
137 | - Database::prepare( |
|
138 | - "UPDATE `##gedcom_chunk`" . |
|
139 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING cp850) USING utf8)" . |
|
140 | - " WHERE gedcom_id=?" |
|
141 | - )->execute(array($gedcom_id)); |
|
142 | - break; |
|
143 | - case 'ANSI': // ANSI could be anything. Most applications seem to treat it as latin1. |
|
144 | - $controller->addInlineJavascript( |
|
145 | - 'jQuery("#import' . $gedcom_id . '").parent().prepend("<div class=\"bg-info\">' . /* I18N: %1$s and %2$s are the names of character encodings, such as ISO-8859-1 or ASCII */ |
|
146 | - I18N::translate('This GEDCOM file is encoded using %1$s. Assume this to mean %2$s.', $charset, 'ISO-8859-1') . '</div>");' |
|
147 | - ); |
|
148 | - // no break; |
|
149 | - case 'WINDOWS': |
|
150 | - case 'CP1252': |
|
151 | - case 'ISO8859-1': |
|
152 | - case 'ISO-8859-1': |
|
153 | - case 'LATIN1': |
|
154 | - case 'LATIN-1': |
|
155 | - // Convert from ISO-8859-1 (western european) to UTF8. |
|
156 | - Database::prepare( |
|
157 | - "UPDATE `##gedcom_chunk`" . |
|
158 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin1) USING utf8)" . |
|
159 | - " WHERE gedcom_id=?" |
|
160 | - )->execute(array($gedcom_id)); |
|
161 | - break; |
|
162 | - case 'CP1250': |
|
163 | - case 'ISO8859-2': |
|
164 | - case 'ISO-8859-2': |
|
165 | - case 'LATIN2': |
|
166 | - case 'LATIN-2': |
|
167 | - // Convert from ISO-8859-2 (eastern european) to UTF8. |
|
168 | - Database::prepare( |
|
169 | - "UPDATE `##gedcom_chunk`" . |
|
170 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin2) USING utf8)" . |
|
171 | - " WHERE gedcom_id=?" |
|
172 | - )->execute(array($gedcom_id)); |
|
173 | - break; |
|
174 | - case 'MACINTOSH': |
|
175 | - // Convert from MAC Roman to UTF8. |
|
176 | - Database::prepare( |
|
177 | - "UPDATE `##gedcom_chunk`" . |
|
178 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING macroman) USING utf8)" . |
|
179 | - " WHERE gedcom_id=?" |
|
180 | - )->execute(array($gedcom_id)); |
|
181 | - break; |
|
182 | - case 'UTF8': |
|
183 | - case 'UTF-8': |
|
184 | - // Already UTF-8 so nothing to do! |
|
185 | - break; |
|
186 | - case 'ANSEL': |
|
187 | - default: |
|
188 | - Database::rollBack(); |
|
189 | - echo '<span class="error">', I18N::translate('Error: converting GEDCOM files from %s encoding to UTF-8 encoding not currently supported.', $charset), '</span>'; |
|
190 | - $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
191 | - |
|
192 | - return; |
|
193 | - } |
|
194 | - $first_time = false; |
|
195 | - |
|
196 | - // Re-fetch the data, now that we have performed character set conversion. |
|
197 | - $data = Database::prepare( |
|
198 | - "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
199 | - " FROM `##gedcom_chunk`" . |
|
200 | - " WHERE gedcom_chunk_id=?" |
|
201 | - )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
202 | - } |
|
203 | - |
|
204 | - if (!$data) { |
|
205 | - break; |
|
206 | - } |
|
207 | - try { |
|
208 | - // Import all the records in this chunk of data |
|
209 | - foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { |
|
210 | - FunctionsImport::importRecord($rec, $tree, false); |
|
211 | - } |
|
212 | - // Mark the chunk as imported |
|
213 | - Database::prepare( |
|
214 | - "UPDATE `##gedcom_chunk` SET imported=TRUE WHERE gedcom_chunk_id=?" |
|
215 | - )->execute(array($data->gedcom_chunk_id)); |
|
216 | - } catch (PDOException $ex) { |
|
217 | - Database::rollBack(); |
|
218 | - if ($ex->getCode() === '40001') { |
|
219 | - // "SQLSTATE[40001]: Serialization failure: 1213 Deadlock found when trying to get lock; try restarting transaction" |
|
220 | - // The documentation says that if you get this error, wait and try again..... |
|
221 | - $controller->addInlineJavascript('jQuery("#import' . $gedcom_id . '").load("import.php?gedcom_id=' . $gedcom_id . '&u=' . uniqid() . '");'); |
|
222 | - } else { |
|
223 | - // A fatal error. Nothing we can do? |
|
224 | - echo '<span class="error">', $ex->getMessage(), '</span>'; |
|
225 | - $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
226 | - } |
|
227 | - |
|
228 | - return; |
|
229 | - } |
|
87 | + $data = Database::prepare( |
|
88 | + "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
89 | + " FROM `##gedcom_chunk`" . |
|
90 | + " WHERE gedcom_id=? AND NOT imported" . |
|
91 | + " ORDER BY gedcom_chunk_id" . |
|
92 | + " LIMIT 1" |
|
93 | + )->execute(array($gedcom_id))->fetchOneRow(); |
|
94 | + // If we are loading the first (header) record, make sure the encoding is UTF-8. |
|
95 | + if ($first_time) { |
|
96 | + // Remove any byte-order-mark |
|
97 | + Database::prepare( |
|
98 | + "UPDATE `##gedcom_chunk`" . |
|
99 | + " SET chunk_data=TRIM(LEADING ? FROM chunk_data)" . |
|
100 | + " WHERE gedcom_chunk_id=?" |
|
101 | + )->execute(array(WT_UTF8_BOM, $data->gedcom_chunk_id)); |
|
102 | + // Re-fetch the data, now that we have removed the BOM |
|
103 | + $data = Database::prepare( |
|
104 | + "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
105 | + " FROM `##gedcom_chunk`" . |
|
106 | + " WHERE gedcom_chunk_id=?" |
|
107 | + )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
108 | + if (substr($data->chunk_data, 0, 6) != '0 HEAD') { |
|
109 | + Database::rollBack(); |
|
110 | + echo I18N::translate('Invalid GEDCOM file - no header record found.'); |
|
111 | + $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
112 | + |
|
113 | + return; |
|
114 | + } |
|
115 | + // What character set is this? Need to convert it to UTF8 |
|
116 | + if (preg_match('/[\r\n][ \t]*1 CHAR(?:ACTER)? (.+)/', $data->chunk_data, $match)) { |
|
117 | + $charset = trim(strtoupper($match[1])); |
|
118 | + } else { |
|
119 | + $charset = 'ASCII'; |
|
120 | + } |
|
121 | + // MySQL supports a wide range of collation conversions. These are ones that |
|
122 | + // have been encountered "in the wild". |
|
123 | + switch ($charset) { |
|
124 | + case 'ASCII': |
|
125 | + Database::prepare( |
|
126 | + "UPDATE `##gedcom_chunk`" . |
|
127 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING ascii) USING utf8)" . |
|
128 | + " WHERE gedcom_id=?" |
|
129 | + )->execute(array($gedcom_id)); |
|
130 | + break; |
|
131 | + case 'IBMPC': // IBMPC, IBM WINDOWS and MS-DOS could be anything. Mostly it means CP850. |
|
132 | + case 'IBM WINDOWS': |
|
133 | + case 'MS-DOS': |
|
134 | + case 'CP437': |
|
135 | + case 'CP850': |
|
136 | + // CP850 has extra letters with diacritics to replace box-drawing chars in CP437. |
|
137 | + Database::prepare( |
|
138 | + "UPDATE `##gedcom_chunk`" . |
|
139 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING cp850) USING utf8)" . |
|
140 | + " WHERE gedcom_id=?" |
|
141 | + )->execute(array($gedcom_id)); |
|
142 | + break; |
|
143 | + case 'ANSI': // ANSI could be anything. Most applications seem to treat it as latin1. |
|
144 | + $controller->addInlineJavascript( |
|
145 | + 'jQuery("#import' . $gedcom_id . '").parent().prepend("<div class=\"bg-info\">' . /* I18N: %1$s and %2$s are the names of character encodings, such as ISO-8859-1 or ASCII */ |
|
146 | + I18N::translate('This GEDCOM file is encoded using %1$s. Assume this to mean %2$s.', $charset, 'ISO-8859-1') . '</div>");' |
|
147 | + ); |
|
148 | + // no break; |
|
149 | + case 'WINDOWS': |
|
150 | + case 'CP1252': |
|
151 | + case 'ISO8859-1': |
|
152 | + case 'ISO-8859-1': |
|
153 | + case 'LATIN1': |
|
154 | + case 'LATIN-1': |
|
155 | + // Convert from ISO-8859-1 (western european) to UTF8. |
|
156 | + Database::prepare( |
|
157 | + "UPDATE `##gedcom_chunk`" . |
|
158 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin1) USING utf8)" . |
|
159 | + " WHERE gedcom_id=?" |
|
160 | + )->execute(array($gedcom_id)); |
|
161 | + break; |
|
162 | + case 'CP1250': |
|
163 | + case 'ISO8859-2': |
|
164 | + case 'ISO-8859-2': |
|
165 | + case 'LATIN2': |
|
166 | + case 'LATIN-2': |
|
167 | + // Convert from ISO-8859-2 (eastern european) to UTF8. |
|
168 | + Database::prepare( |
|
169 | + "UPDATE `##gedcom_chunk`" . |
|
170 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin2) USING utf8)" . |
|
171 | + " WHERE gedcom_id=?" |
|
172 | + )->execute(array($gedcom_id)); |
|
173 | + break; |
|
174 | + case 'MACINTOSH': |
|
175 | + // Convert from MAC Roman to UTF8. |
|
176 | + Database::prepare( |
|
177 | + "UPDATE `##gedcom_chunk`" . |
|
178 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING macroman) USING utf8)" . |
|
179 | + " WHERE gedcom_id=?" |
|
180 | + )->execute(array($gedcom_id)); |
|
181 | + break; |
|
182 | + case 'UTF8': |
|
183 | + case 'UTF-8': |
|
184 | + // Already UTF-8 so nothing to do! |
|
185 | + break; |
|
186 | + case 'ANSEL': |
|
187 | + default: |
|
188 | + Database::rollBack(); |
|
189 | + echo '<span class="error">', I18N::translate('Error: converting GEDCOM files from %s encoding to UTF-8 encoding not currently supported.', $charset), '</span>'; |
|
190 | + $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
191 | + |
|
192 | + return; |
|
193 | + } |
|
194 | + $first_time = false; |
|
195 | + |
|
196 | + // Re-fetch the data, now that we have performed character set conversion. |
|
197 | + $data = Database::prepare( |
|
198 | + "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
199 | + " FROM `##gedcom_chunk`" . |
|
200 | + " WHERE gedcom_chunk_id=?" |
|
201 | + )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
202 | + } |
|
203 | + |
|
204 | + if (!$data) { |
|
205 | + break; |
|
206 | + } |
|
207 | + try { |
|
208 | + // Import all the records in this chunk of data |
|
209 | + foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { |
|
210 | + FunctionsImport::importRecord($rec, $tree, false); |
|
211 | + } |
|
212 | + // Mark the chunk as imported |
|
213 | + Database::prepare( |
|
214 | + "UPDATE `##gedcom_chunk` SET imported=TRUE WHERE gedcom_chunk_id=?" |
|
215 | + )->execute(array($data->gedcom_chunk_id)); |
|
216 | + } catch (PDOException $ex) { |
|
217 | + Database::rollBack(); |
|
218 | + if ($ex->getCode() === '40001') { |
|
219 | + // "SQLSTATE[40001]: Serialization failure: 1213 Deadlock found when trying to get lock; try restarting transaction" |
|
220 | + // The documentation says that if you get this error, wait and try again..... |
|
221 | + $controller->addInlineJavascript('jQuery("#import' . $gedcom_id . '").load("import.php?gedcom_id=' . $gedcom_id . '&u=' . uniqid() . '");'); |
|
222 | + } else { |
|
223 | + // A fatal error. Nothing we can do? |
|
224 | + echo '<span class="error">', $ex->getMessage(), '</span>'; |
|
225 | + $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
226 | + } |
|
227 | + |
|
228 | + return; |
|
229 | + } |
|
230 | 230 | } |
231 | 231 | |
232 | 232 | Database::commit(); |