@@ -28,14 +28,14 @@ discard block |
||
| 28 | 28 | $tree = Tree::findById($gedcom_id); |
| 29 | 29 | |
| 30 | 30 | if (!$tree || !Auth::isManager($tree, Auth::user())) { |
| 31 | - http_response_code(403); |
|
| 31 | + http_response_code(403); |
|
| 32 | 32 | |
| 33 | - return; |
|
| 33 | + return; |
|
| 34 | 34 | } |
| 35 | 35 | |
| 36 | 36 | $controller = new AjaxController; |
| 37 | 37 | $controller |
| 38 | - ->pageHeader(); |
|
| 38 | + ->pageHeader(); |
|
| 39 | 39 | |
| 40 | 40 | // Run in a transaction |
| 41 | 41 | Database::beginTransaction(); |
@@ -45,22 +45,22 @@ discard block |
||
| 45 | 45 | |
| 46 | 46 | // What is the current import status? |
| 47 | 47 | $row = Database::prepare( |
| 48 | - "SELECT" . |
|
| 49 | - " SUM(IF(imported, LENGTH(chunk_data), 0)) AS import_offset," . |
|
| 50 | - " SUM(LENGTH(chunk_data)) AS import_total" . |
|
| 51 | - " FROM `##gedcom_chunk` WHERE gedcom_id=?" |
|
| 48 | + "SELECT" . |
|
| 49 | + " SUM(IF(imported, LENGTH(chunk_data), 0)) AS import_offset," . |
|
| 50 | + " SUM(LENGTH(chunk_data)) AS import_total" . |
|
| 51 | + " FROM `##gedcom_chunk` WHERE gedcom_id=?" |
|
| 52 | 52 | )->execute(array($gedcom_id))->fetchOneRow(); |
| 53 | 53 | |
| 54 | 54 | if ($row->import_offset == $row->import_total) { |
| 55 | - Tree::findById($gedcom_id)->setPreference('imported', '1'); |
|
| 56 | - // Finished? Show the maintenance links, similar to admin_trees_manage.php |
|
| 57 | - Database::commit(); |
|
| 58 | - $controller->addInlineJavascript( |
|
| 59 | - 'jQuery("#import' . $gedcom_id . '").addClass("hidden");' . |
|
| 60 | - 'jQuery("#actions' . $gedcom_id . '").removeClass("hidden");' |
|
| 61 | - ); |
|
| 62 | - |
|
| 63 | - return; |
|
| 55 | + Tree::findById($gedcom_id)->setPreference('imported', '1'); |
|
| 56 | + // Finished? Show the maintenance links, similar to admin_trees_manage.php |
|
| 57 | + Database::commit(); |
|
| 58 | + $controller->addInlineJavascript( |
|
| 59 | + 'jQuery("#import' . $gedcom_id . '").addClass("hidden");' . |
|
| 60 | + 'jQuery("#actions' . $gedcom_id . '").removeClass("hidden");' |
|
| 61 | + ); |
|
| 62 | + |
|
| 63 | + return; |
|
| 64 | 64 | } |
| 65 | 65 | |
| 66 | 66 | // Calculate progress so far |
@@ -84,149 +84,149 @@ discard block |
||
| 84 | 84 | $first_time = ($row->import_offset == 0); |
| 85 | 85 | // Run for one second. This keeps the resource requirements low. |
| 86 | 86 | for ($end_time = microtime(true) + 1.0; microtime(true) < $end_time;) { |
| 87 | - $data = Database::prepare( |
|
| 88 | - "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
| 89 | - " FROM `##gedcom_chunk`" . |
|
| 90 | - " WHERE gedcom_id=? AND NOT imported" . |
|
| 91 | - " ORDER BY gedcom_chunk_id" . |
|
| 92 | - " LIMIT 1" |
|
| 93 | - )->execute(array($gedcom_id))->fetchOneRow(); |
|
| 94 | - // If we are loading the first (header) record, make sure the encoding is UTF-8. |
|
| 95 | - if ($first_time) { |
|
| 96 | - // Remove any byte-order-mark |
|
| 97 | - Database::prepare( |
|
| 98 | - "UPDATE `##gedcom_chunk`" . |
|
| 99 | - " SET chunk_data=TRIM(LEADING ? FROM chunk_data)" . |
|
| 100 | - " WHERE gedcom_chunk_id=?" |
|
| 101 | - )->execute(array(WT_UTF8_BOM, $data->gedcom_chunk_id)); |
|
| 102 | - // Re-fetch the data, now that we have removed the BOM |
|
| 103 | - $data = Database::prepare( |
|
| 104 | - "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
| 105 | - " FROM `##gedcom_chunk`" . |
|
| 106 | - " WHERE gedcom_chunk_id=?" |
|
| 107 | - )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
| 108 | - if (substr($data->chunk_data, 0, 6) != '0 HEAD') { |
|
| 109 | - Database::rollBack(); |
|
| 110 | - echo I18N::translate('Invalid GEDCOM file - no header record found.'); |
|
| 111 | - $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
| 112 | - |
|
| 113 | - return; |
|
| 114 | - } |
|
| 115 | - // What character set is this? Need to convert it to UTF8 |
|
| 116 | - if (preg_match('/[\r\n][ \t]*1 CHAR(?:ACTER)? (.+)/', $data->chunk_data, $match)) { |
|
| 117 | - $charset = trim(strtoupper($match[1])); |
|
| 118 | - } else { |
|
| 119 | - $charset = 'ASCII'; |
|
| 120 | - } |
|
| 121 | - // MySQL supports a wide range of collation conversions. These are ones that |
|
| 122 | - // have been encountered "in the wild". |
|
| 123 | - switch ($charset) { |
|
| 124 | - case 'ASCII': |
|
| 125 | - Database::prepare( |
|
| 126 | - "UPDATE `##gedcom_chunk`" . |
|
| 127 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING ascii) USING utf8)" . |
|
| 128 | - " WHERE gedcom_id=?" |
|
| 129 | - )->execute(array($gedcom_id)); |
|
| 130 | - break; |
|
| 131 | - case 'IBMPC': // IBMPC, IBM WINDOWS and MS-DOS could be anything. Mostly it means CP850. |
|
| 132 | - case 'IBM WINDOWS': |
|
| 133 | - case 'MS-DOS': |
|
| 134 | - case 'CP437': |
|
| 135 | - case 'CP850': |
|
| 136 | - // CP850 has extra letters with diacritics to replace box-drawing chars in CP437. |
|
| 137 | - Database::prepare( |
|
| 138 | - "UPDATE `##gedcom_chunk`" . |
|
| 139 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING cp850) USING utf8)" . |
|
| 140 | - " WHERE gedcom_id=?" |
|
| 141 | - )->execute(array($gedcom_id)); |
|
| 142 | - break; |
|
| 143 | - case 'ANSI': // ANSI could be anything. Most applications seem to treat it as latin1. |
|
| 144 | - $controller->addInlineJavascript( |
|
| 145 | - 'jQuery("#import' . $gedcom_id . '").parent().prepend("<div class=\"bg-info\">' . /* I18N: %1$s and %2$s are the names of character encodings, such as ISO-8859-1 or ASCII */ |
|
| 146 | - I18N::translate('This GEDCOM file is encoded using %1$s. Assume this to mean %2$s.', $charset, 'ISO-8859-1') . '</div>");' |
|
| 147 | - ); |
|
| 148 | - // no break; |
|
| 149 | - case 'WINDOWS': |
|
| 150 | - case 'CP1252': |
|
| 151 | - case 'ISO8859-1': |
|
| 152 | - case 'ISO-8859-1': |
|
| 153 | - case 'LATIN1': |
|
| 154 | - case 'LATIN-1': |
|
| 155 | - // Convert from ISO-8859-1 (western european) to UTF8. |
|
| 156 | - Database::prepare( |
|
| 157 | - "UPDATE `##gedcom_chunk`" . |
|
| 158 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin1) USING utf8)" . |
|
| 159 | - " WHERE gedcom_id=?" |
|
| 160 | - )->execute(array($gedcom_id)); |
|
| 161 | - break; |
|
| 162 | - case 'CP1250': |
|
| 163 | - case 'ISO8859-2': |
|
| 164 | - case 'ISO-8859-2': |
|
| 165 | - case 'LATIN2': |
|
| 166 | - case 'LATIN-2': |
|
| 167 | - // Convert from ISO-8859-2 (eastern european) to UTF8. |
|
| 168 | - Database::prepare( |
|
| 169 | - "UPDATE `##gedcom_chunk`" . |
|
| 170 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin2) USING utf8)" . |
|
| 171 | - " WHERE gedcom_id=?" |
|
| 172 | - )->execute(array($gedcom_id)); |
|
| 173 | - break; |
|
| 174 | - case 'MACINTOSH': |
|
| 175 | - // Convert from MAC Roman to UTF8. |
|
| 176 | - Database::prepare( |
|
| 177 | - "UPDATE `##gedcom_chunk`" . |
|
| 178 | - " SET chunk_data=CONVERT(CONVERT(chunk_data USING macroman) USING utf8)" . |
|
| 179 | - " WHERE gedcom_id=?" |
|
| 180 | - )->execute(array($gedcom_id)); |
|
| 181 | - break; |
|
| 182 | - case 'UTF8': |
|
| 183 | - case 'UTF-8': |
|
| 184 | - // Already UTF-8 so nothing to do! |
|
| 185 | - break; |
|
| 186 | - case 'ANSEL': |
|
| 187 | - default: |
|
| 188 | - Database::rollBack(); |
|
| 189 | - echo '<span class="error">', I18N::translate('Error: converting GEDCOM files from %s encoding to UTF-8 encoding not currently supported.', $charset), '</span>'; |
|
| 190 | - $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
| 191 | - |
|
| 192 | - return; |
|
| 193 | - } |
|
| 194 | - $first_time = false; |
|
| 195 | - |
|
| 196 | - // Re-fetch the data, now that we have performed character set conversion. |
|
| 197 | - $data = Database::prepare( |
|
| 198 | - "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
| 199 | - " FROM `##gedcom_chunk`" . |
|
| 200 | - " WHERE gedcom_chunk_id=?" |
|
| 201 | - )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
| 202 | - } |
|
| 203 | - |
|
| 204 | - if (!$data) { |
|
| 205 | - break; |
|
| 206 | - } |
|
| 207 | - try { |
|
| 208 | - // Import all the records in this chunk of data |
|
| 209 | - foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { |
|
| 210 | - FunctionsImport::importRecord($rec, $tree, false); |
|
| 211 | - } |
|
| 212 | - // Mark the chunk as imported |
|
| 213 | - Database::prepare( |
|
| 214 | - "UPDATE `##gedcom_chunk` SET imported=TRUE WHERE gedcom_chunk_id=?" |
|
| 215 | - )->execute(array($data->gedcom_chunk_id)); |
|
| 216 | - } catch (PDOException $ex) { |
|
| 217 | - Database::rollBack(); |
|
| 218 | - if ($ex->getCode() === '40001') { |
|
| 219 | - // "SQLSTATE[40001]: Serialization failure: 1213 Deadlock found when trying to get lock; try restarting transaction" |
|
| 220 | - // The documentation says that if you get this error, wait and try again..... |
|
| 221 | - $controller->addInlineJavascript('jQuery("#import' . $gedcom_id . '").load("import.php?gedcom_id=' . $gedcom_id . '&u=' . uniqid() . '");'); |
|
| 222 | - } else { |
|
| 223 | - // A fatal error. Nothing we can do? |
|
| 224 | - echo '<span class="error">', $ex->getMessage(), '</span>'; |
|
| 225 | - $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
| 226 | - } |
|
| 227 | - |
|
| 228 | - return; |
|
| 229 | - } |
|
| 87 | + $data = Database::prepare( |
|
| 88 | + "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
| 89 | + " FROM `##gedcom_chunk`" . |
|
| 90 | + " WHERE gedcom_id=? AND NOT imported" . |
|
| 91 | + " ORDER BY gedcom_chunk_id" . |
|
| 92 | + " LIMIT 1" |
|
| 93 | + )->execute(array($gedcom_id))->fetchOneRow(); |
|
| 94 | + // If we are loading the first (header) record, make sure the encoding is UTF-8. |
|
| 95 | + if ($first_time) { |
|
| 96 | + // Remove any byte-order-mark |
|
| 97 | + Database::prepare( |
|
| 98 | + "UPDATE `##gedcom_chunk`" . |
|
| 99 | + " SET chunk_data=TRIM(LEADING ? FROM chunk_data)" . |
|
| 100 | + " WHERE gedcom_chunk_id=?" |
|
| 101 | + )->execute(array(WT_UTF8_BOM, $data->gedcom_chunk_id)); |
|
| 102 | + // Re-fetch the data, now that we have removed the BOM |
|
| 103 | + $data = Database::prepare( |
|
| 104 | + "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
| 105 | + " FROM `##gedcom_chunk`" . |
|
| 106 | + " WHERE gedcom_chunk_id=?" |
|
| 107 | + )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
| 108 | + if (substr($data->chunk_data, 0, 6) != '0 HEAD') { |
|
| 109 | + Database::rollBack(); |
|
| 110 | + echo I18N::translate('Invalid GEDCOM file - no header record found.'); |
|
| 111 | + $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
| 112 | + |
|
| 113 | + return; |
|
| 114 | + } |
|
| 115 | + // What character set is this? Need to convert it to UTF8 |
|
| 116 | + if (preg_match('/[\r\n][ \t]*1 CHAR(?:ACTER)? (.+)/', $data->chunk_data, $match)) { |
|
| 117 | + $charset = trim(strtoupper($match[1])); |
|
| 118 | + } else { |
|
| 119 | + $charset = 'ASCII'; |
|
| 120 | + } |
|
| 121 | + // MySQL supports a wide range of collation conversions. These are ones that |
|
| 122 | + // have been encountered "in the wild". |
|
| 123 | + switch ($charset) { |
|
| 124 | + case 'ASCII': |
|
| 125 | + Database::prepare( |
|
| 126 | + "UPDATE `##gedcom_chunk`" . |
|
| 127 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING ascii) USING utf8)" . |
|
| 128 | + " WHERE gedcom_id=?" |
|
| 129 | + )->execute(array($gedcom_id)); |
|
| 130 | + break; |
|
| 131 | + case 'IBMPC': // IBMPC, IBM WINDOWS and MS-DOS could be anything. Mostly it means CP850. |
|
| 132 | + case 'IBM WINDOWS': |
|
| 133 | + case 'MS-DOS': |
|
| 134 | + case 'CP437': |
|
| 135 | + case 'CP850': |
|
| 136 | + // CP850 has extra letters with diacritics to replace box-drawing chars in CP437. |
|
| 137 | + Database::prepare( |
|
| 138 | + "UPDATE `##gedcom_chunk`" . |
|
| 139 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING cp850) USING utf8)" . |
|
| 140 | + " WHERE gedcom_id=?" |
|
| 141 | + )->execute(array($gedcom_id)); |
|
| 142 | + break; |
|
| 143 | + case 'ANSI': // ANSI could be anything. Most applications seem to treat it as latin1. |
|
| 144 | + $controller->addInlineJavascript( |
|
| 145 | + 'jQuery("#import' . $gedcom_id . '").parent().prepend("<div class=\"bg-info\">' . /* I18N: %1$s and %2$s are the names of character encodings, such as ISO-8859-1 or ASCII */ |
|
| 146 | + I18N::translate('This GEDCOM file is encoded using %1$s. Assume this to mean %2$s.', $charset, 'ISO-8859-1') . '</div>");' |
|
| 147 | + ); |
|
| 148 | + // no break; |
|
| 149 | + case 'WINDOWS': |
|
| 150 | + case 'CP1252': |
|
| 151 | + case 'ISO8859-1': |
|
| 152 | + case 'ISO-8859-1': |
|
| 153 | + case 'LATIN1': |
|
| 154 | + case 'LATIN-1': |
|
| 155 | + // Convert from ISO-8859-1 (western european) to UTF8. |
|
| 156 | + Database::prepare( |
|
| 157 | + "UPDATE `##gedcom_chunk`" . |
|
| 158 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin1) USING utf8)" . |
|
| 159 | + " WHERE gedcom_id=?" |
|
| 160 | + )->execute(array($gedcom_id)); |
|
| 161 | + break; |
|
| 162 | + case 'CP1250': |
|
| 163 | + case 'ISO8859-2': |
|
| 164 | + case 'ISO-8859-2': |
|
| 165 | + case 'LATIN2': |
|
| 166 | + case 'LATIN-2': |
|
| 167 | + // Convert from ISO-8859-2 (eastern european) to UTF8. |
|
| 168 | + Database::prepare( |
|
| 169 | + "UPDATE `##gedcom_chunk`" . |
|
| 170 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING latin2) USING utf8)" . |
|
| 171 | + " WHERE gedcom_id=?" |
|
| 172 | + )->execute(array($gedcom_id)); |
|
| 173 | + break; |
|
| 174 | + case 'MACINTOSH': |
|
| 175 | + // Convert from MAC Roman to UTF8. |
|
| 176 | + Database::prepare( |
|
| 177 | + "UPDATE `##gedcom_chunk`" . |
|
| 178 | + " SET chunk_data=CONVERT(CONVERT(chunk_data USING macroman) USING utf8)" . |
|
| 179 | + " WHERE gedcom_id=?" |
|
| 180 | + )->execute(array($gedcom_id)); |
|
| 181 | + break; |
|
| 182 | + case 'UTF8': |
|
| 183 | + case 'UTF-8': |
|
| 184 | + // Already UTF-8 so nothing to do! |
|
| 185 | + break; |
|
| 186 | + case 'ANSEL': |
|
| 187 | + default: |
|
| 188 | + Database::rollBack(); |
|
| 189 | + echo '<span class="error">', I18N::translate('Error: converting GEDCOM files from %s encoding to UTF-8 encoding not currently supported.', $charset), '</span>'; |
|
| 190 | + $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
| 191 | + |
|
| 192 | + return; |
|
| 193 | + } |
|
| 194 | + $first_time = false; |
|
| 195 | + |
|
| 196 | + // Re-fetch the data, now that we have performed character set conversion. |
|
| 197 | + $data = Database::prepare( |
|
| 198 | + "SELECT gedcom_chunk_id, REPLACE(chunk_data, '\r', '\n') AS chunk_data" . |
|
| 199 | + " FROM `##gedcom_chunk`" . |
|
| 200 | + " WHERE gedcom_chunk_id=?" |
|
| 201 | + )->execute(array($data->gedcom_chunk_id))->fetchOneRow(); |
|
| 202 | + } |
|
| 203 | + |
|
| 204 | + if (!$data) { |
|
| 205 | + break; |
|
| 206 | + } |
|
| 207 | + try { |
|
| 208 | + // Import all the records in this chunk of data |
|
| 209 | + foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { |
|
| 210 | + FunctionsImport::importRecord($rec, $tree, false); |
|
| 211 | + } |
|
| 212 | + // Mark the chunk as imported |
|
| 213 | + Database::prepare( |
|
| 214 | + "UPDATE `##gedcom_chunk` SET imported=TRUE WHERE gedcom_chunk_id=?" |
|
| 215 | + )->execute(array($data->gedcom_chunk_id)); |
|
| 216 | + } catch (PDOException $ex) { |
|
| 217 | + Database::rollBack(); |
|
| 218 | + if ($ex->getCode() === '40001') { |
|
| 219 | + // "SQLSTATE[40001]: Serialization failure: 1213 Deadlock found when trying to get lock; try restarting transaction" |
|
| 220 | + // The documentation says that if you get this error, wait and try again..... |
|
| 221 | + $controller->addInlineJavascript('jQuery("#import' . $gedcom_id . '").load("import.php?gedcom_id=' . $gedcom_id . '&u=' . uniqid() . '");'); |
|
| 222 | + } else { |
|
| 223 | + // A fatal error. Nothing we can do? |
|
| 224 | + echo '<span class="error">', $ex->getMessage(), '</span>'; |
|
| 225 | + $controller->addInlineJavascript('jQuery("#actions' . $gedcom_id . '").removeClass("hidden");'); |
|
| 226 | + } |
|
| 227 | + |
|
| 228 | + return; |
|
| 229 | + } |
|
| 230 | 230 | } |
| 231 | 231 | |
| 232 | 232 | Database::commit(); |