mattboyer /
sqbrite
| 1 | # MIT License |
||
| 2 | # |
||
| 3 | # Copyright (c) 2017 Matt Boyer |
||
| 4 | # |
||
| 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
||
| 6 | # of this software and associated documentation files (the "Software"), to deal |
||
| 7 | # in the Software without restriction, including without limitation the rights |
||
| 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||
| 9 | # copies of the Software, and to permit persons to whom the Software is |
||
| 10 | # furnished to do so, subject to the following conditions: |
||
| 11 | # |
||
| 12 | # The above copyright notice and this permission notice shall be included in |
||
| 13 | # all copies or substantial portions of the Software. |
||
| 14 | # |
||
| 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
| 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
| 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||
| 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
| 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
| 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
| 21 | # SOFTWARE. |
||
| 22 | |||
| 23 | import csv |
||
| 24 | import os |
||
| 25 | import tempfile |
||
| 26 | |||
| 27 | from . import _LOGGER |
||
| 28 | from .record import Record |
||
| 29 | from .pages import BTreePage |
||
| 30 | |||
| 31 | |||
| 32 | View Code Duplication | class Table(object): |
|
|
0 ignored issues
–
show
Duplication
introduced
by
Loading history...
|
|||
| 33 | def __init__(self, name, db, rootpage, signatures): |
||
| 34 | self._name = name |
||
| 35 | self._db = db |
||
| 36 | self._signatures = signatures |
||
| 37 | assert(isinstance(rootpage, BTreePage)) |
||
| 38 | self._root = rootpage |
||
| 39 | self._leaves = [] |
||
| 40 | try: |
||
| 41 | self._columns = self._db.table_columns[self.name] |
||
| 42 | except KeyError: |
||
| 43 | self._columns = None |
||
| 44 | |||
| 45 | # We want this to be a list of leaf-type pages, sorted in the order of |
||
| 46 | # their smallest rowid |
||
| 47 | self._populate_pages() |
||
| 48 | |||
| 49 | @property |
||
| 50 | def name(self): |
||
| 51 | return self._name |
||
| 52 | |||
| 53 | def add_leaf(self, leaf_page): |
||
| 54 | self._leaves.append(leaf_page) |
||
| 55 | |||
| 56 | @property |
||
| 57 | def columns(self): |
||
| 58 | return self._columns |
||
| 59 | |||
| 60 | def __repr__(self): |
||
| 61 | return "<SQLite table \"{}\", root: {}, leaves: {}>".format( |
||
| 62 | self.name, self._root.idx, len(self._leaves) |
||
| 63 | ) |
||
| 64 | |||
| 65 | def _populate_pages(self): |
||
| 66 | _LOGGER.info("Page %d is root for %s", self._root.idx, self.name) |
||
| 67 | table_pages = [self._root] |
||
| 68 | |||
| 69 | if self._root.btree_header.right_most_page_idx is not None: |
||
| 70 | rightmost_idx = self._root.btree_header.right_most_page_idx |
||
| 71 | rightmost_page = self._db.pages[rightmost_idx] |
||
| 72 | if rightmost_page is not self._root: |
||
| 73 | _LOGGER.info( |
||
| 74 | "Page %d is rightmost for %s", |
||
| 75 | rightmost_idx, self.name |
||
| 76 | ) |
||
| 77 | table_pages.append(rightmost_page) |
||
| 78 | |||
| 79 | page_queue = list(table_pages) |
||
| 80 | while page_queue: |
||
| 81 | table_page = page_queue.pop(0) |
||
| 82 | # table_pages is initialised with the table's rootpage, which |
||
| 83 | # may be a leaf page for a very small table |
||
| 84 | if table_page.page_type != 'Table Interior': |
||
| 85 | self._leaves.append(table_page) |
||
| 86 | continue |
||
| 87 | |||
| 88 | for cell_idx in table_page.cells: |
||
| 89 | page_ptr, max_row_in_page = table_page.cells[cell_idx] |
||
| 90 | |||
| 91 | page = self._db.pages[page_ptr] |
||
| 92 | _LOGGER.debug("B-Tree cell: (%r, %d)", page, max_row_in_page) |
||
| 93 | table_pages.append(page) |
||
| 94 | if page.page_type == 'Table Interior': |
||
| 95 | page_queue.append(page) |
||
| 96 | elif page.page_type == 'Table Leaf': |
||
| 97 | self._leaves.append(page) |
||
| 98 | |||
| 99 | assert(all(p.page_type == 'Table Leaf' for p in self._leaves)) |
||
| 100 | for page in table_pages: |
||
| 101 | self._db.map_table_page(page.idx, self) |
||
| 102 | |||
| 103 | @property |
||
| 104 | def leaves(self): |
||
| 105 | for leaf_page in self._leaves: |
||
| 106 | yield leaf_page |
||
| 107 | |||
| 108 | def recover_records(self, grouping): |
||
| 109 | for page in self.leaves: |
||
| 110 | assert isinstance(page, BTreePage) |
||
| 111 | if not page.freeblocks: |
||
| 112 | continue |
||
| 113 | |||
| 114 | _LOGGER.info("%r", page) |
||
| 115 | page.recover_freeblock_records(grouping) |
||
| 116 | page.print_recovered_records() |
||
| 117 | |||
| 118 | def csv_dump(self, out_dir): |
||
| 119 | csv_path = os.path.join(out_dir, self.name + '.csv') |
||
| 120 | if os.path.exists(csv_path): |
||
| 121 | raise ValueError("Output file {} exists!".format(csv_path)) |
||
| 122 | |||
| 123 | _LOGGER.info("Dumping table \"%s\" to CSV", self.name) |
||
| 124 | with tempfile.TemporaryFile('w+', newline='') as csv_temp: |
||
| 125 | writer = csv.DictWriter(csv_temp, fieldnames=self._columns) |
||
| 126 | writer.writeheader() |
||
| 127 | |||
| 128 | for leaf_page in self.leaves: |
||
| 129 | for cell_idx in leaf_page.cells: |
||
| 130 | rowid, record = leaf_page.cells[cell_idx] |
||
| 131 | # assert(self.check_signature(record)) |
||
| 132 | |||
| 133 | _LOGGER.debug('Record %d: %r', rowid, record.header) |
||
| 134 | fields_iter = ( |
||
| 135 | repr(record.fields[idx]) for idx in record.fields |
||
|
0 ignored issues
–
show
|
|||
| 136 | ) |
||
| 137 | _LOGGER.debug(', '.join(fields_iter)) |
||
| 138 | |||
| 139 | values_iter = ( |
||
| 140 | record.fields[idx].value for idx in record.fields |
||
| 141 | ) |
||
| 142 | writer.writerow(dict(zip(self._columns, values_iter))) |
||
| 143 | |||
| 144 | if not leaf_page.recovered_records: |
||
| 145 | continue |
||
| 146 | |||
| 147 | # Recovered records are in an unordered set because their rowid |
||
| 148 | # has been lost, making sorting impossible |
||
| 149 | for record in leaf_page.recovered_records: |
||
| 150 | values_iter = ( |
||
| 151 | record.fields[idx].value for idx in record.fields |
||
| 152 | ) |
||
| 153 | writer.writerow(dict(zip(self._columns, values_iter))) |
||
| 154 | |||
| 155 | if csv_temp.tell() > 0: |
||
| 156 | csv_temp.seek(0) |
||
| 157 | with open(csv_path, 'w', encoding='UTF8') as csv_file: |
||
| 158 | csv_file.write(csv_temp.read()) |
||
| 159 | |||
| 160 | def build_insert_SQL(self, record): |
||
| 161 | column_placeholders = ( |
||
| 162 | ':' + col_name for col_name in self._columns |
||
|
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
| 163 | ) |
||
| 164 | insert_statement = 'INSERT INTO {} VALUES ({})'.format( |
||
| 165 | self.name, |
||
| 166 | ', '.join(c for c in column_placeholders), |
||
| 167 | ) |
||
| 168 | value_kwargs = {} |
||
| 169 | for col_idx, col_name in enumerate(self._columns): |
||
| 170 | try: |
||
| 171 | if record.fields[col_idx].value is None: |
||
| 172 | value_kwargs[col_name] = None |
||
| 173 | else: |
||
| 174 | value_kwargs[col_name] = record.fields[col_idx].value |
||
| 175 | except KeyError: |
||
| 176 | value_kwargs[col_name] = None |
||
| 177 | |||
| 178 | return insert_statement, value_kwargs |
||
| 179 | |||
| 180 | def check_signature(self, record): |
||
| 181 | assert isinstance(record, Record) |
||
| 182 | try: |
||
| 183 | sig = self._signatures[self.name] |
||
| 184 | except KeyError: |
||
| 185 | # The sqlite schema tables don't have a signature (or need one) |
||
| 186 | return True |
||
| 187 | if len(record.fields) > len(self.columns): |
||
| 188 | return False |
||
| 189 | |||
| 190 | # It's OK for a record to have fewer fields than there are columns in |
||
| 191 | # this table, this is seen when NULLable or default-valued columns are |
||
| 192 | # added in an ALTER TABLE statement. |
||
| 193 | for field_idx, field in record.fields.items(): |
||
| 194 | # NULL can be a value for any column type |
||
| 195 | if field.value is None: |
||
| 196 | continue |
||
| 197 | if not isinstance(field.value, sig[field_idx]): |
||
| 198 | return False |
||
| 199 | return True |
||
| 200 |