1 | # MIT License |
||
2 | # |
||
3 | # Copyright (c) 2017 Matt Boyer |
||
4 | # |
||
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy |
||
6 | # of this software and associated documentation files (the "Software"), to deal |
||
7 | # in the Software without restriction, including without limitation the rights |
||
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||
9 | # copies of the Software, and to permit persons to whom the Software is |
||
10 | # furnished to do so, subject to the following conditions: |
||
11 | # |
||
12 | # The above copyright notice and this permission notice shall be included in |
||
13 | # all copies or substantial portions of the Software. |
||
14 | # |
||
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
21 | # SOFTWARE. |
||
22 | |||
23 | import csv |
||
24 | import os |
||
25 | import tempfile |
||
26 | |||
27 | from . import _LOGGER |
||
28 | from .record import Record |
||
29 | from .pages import BTreePage |
||
30 | |||
31 | |||
32 | View Code Duplication | class Table(object): |
|
0 ignored issues
–
show
Duplication
introduced
by
Loading history...
|
|||
33 | def __init__(self, name, db, rootpage, signatures): |
||
34 | self._name = name |
||
35 | self._db = db |
||
36 | self._signatures = signatures |
||
37 | assert(isinstance(rootpage, BTreePage)) |
||
38 | self._root = rootpage |
||
39 | self._leaves = [] |
||
40 | try: |
||
41 | self._columns = self._db.table_columns[self.name] |
||
42 | except KeyError: |
||
43 | self._columns = None |
||
44 | |||
45 | # We want this to be a list of leaf-type pages, sorted in the order of |
||
46 | # their smallest rowid |
||
47 | self._populate_pages() |
||
48 | |||
49 | @property |
||
50 | def name(self): |
||
51 | return self._name |
||
52 | |||
53 | def add_leaf(self, leaf_page): |
||
54 | self._leaves.append(leaf_page) |
||
55 | |||
56 | @property |
||
57 | def columns(self): |
||
58 | return self._columns |
||
59 | |||
60 | def __repr__(self): |
||
61 | return "<SQLite table \"{}\", root: {}, leaves: {}>".format( |
||
62 | self.name, self._root.idx, len(self._leaves) |
||
63 | ) |
||
64 | |||
65 | def _populate_pages(self): |
||
66 | _LOGGER.info("Page %d is root for %s", self._root.idx, self.name) |
||
67 | table_pages = [self._root] |
||
68 | |||
69 | if self._root.btree_header.right_most_page_idx is not None: |
||
70 | rightmost_idx = self._root.btree_header.right_most_page_idx |
||
71 | rightmost_page = self._db.pages[rightmost_idx] |
||
72 | if rightmost_page is not self._root: |
||
73 | _LOGGER.info( |
||
74 | "Page %d is rightmost for %s", |
||
75 | rightmost_idx, self.name |
||
76 | ) |
||
77 | table_pages.append(rightmost_page) |
||
78 | |||
79 | page_queue = list(table_pages) |
||
80 | while page_queue: |
||
81 | table_page = page_queue.pop(0) |
||
82 | # table_pages is initialised with the table's rootpage, which |
||
83 | # may be a leaf page for a very small table |
||
84 | if table_page.page_type != 'Table Interior': |
||
85 | self._leaves.append(table_page) |
||
86 | continue |
||
87 | |||
88 | for cell_idx in table_page.cells: |
||
89 | page_ptr, max_row_in_page = table_page.cells[cell_idx] |
||
90 | |||
91 | page = self._db.pages[page_ptr] |
||
92 | _LOGGER.debug("B-Tree cell: (%r, %d)", page, max_row_in_page) |
||
93 | table_pages.append(page) |
||
94 | if page.page_type == 'Table Interior': |
||
95 | page_queue.append(page) |
||
96 | elif page.page_type == 'Table Leaf': |
||
97 | self._leaves.append(page) |
||
98 | |||
99 | assert(all(p.page_type == 'Table Leaf' for p in self._leaves)) |
||
100 | for page in table_pages: |
||
101 | self._db.map_table_page(page.idx, self) |
||
102 | |||
103 | @property |
||
104 | def leaves(self): |
||
105 | for leaf_page in self._leaves: |
||
106 | yield leaf_page |
||
107 | |||
108 | def recover_records(self, grouping): |
||
109 | for page in self.leaves: |
||
110 | assert isinstance(page, BTreePage) |
||
111 | if not page.freeblocks: |
||
112 | continue |
||
113 | |||
114 | _LOGGER.info("%r", page) |
||
115 | page.recover_freeblock_records(grouping) |
||
116 | page.print_recovered_records() |
||
117 | |||
118 | def csv_dump(self, out_dir): |
||
119 | csv_path = os.path.join(out_dir, self.name + '.csv') |
||
120 | if os.path.exists(csv_path): |
||
121 | raise ValueError("Output file {} exists!".format(csv_path)) |
||
122 | |||
123 | _LOGGER.info("Dumping table \"%s\" to CSV", self.name) |
||
124 | with tempfile.TemporaryFile('w+', newline='') as csv_temp: |
||
125 | writer = csv.DictWriter(csv_temp, fieldnames=self._columns) |
||
126 | writer.writeheader() |
||
127 | |||
128 | for leaf_page in self.leaves: |
||
129 | for cell_idx in leaf_page.cells: |
||
130 | rowid, record = leaf_page.cells[cell_idx] |
||
131 | # assert(self.check_signature(record)) |
||
132 | |||
133 | _LOGGER.debug('Record %d: %r', rowid, record.header) |
||
134 | fields_iter = ( |
||
135 | repr(record.fields[idx]) for idx in record.fields |
||
0 ignored issues
–
show
|
|||
136 | ) |
||
137 | _LOGGER.debug(', '.join(fields_iter)) |
||
138 | |||
139 | values_iter = ( |
||
140 | record.fields[idx].value for idx in record.fields |
||
141 | ) |
||
142 | writer.writerow(dict(zip(self._columns, values_iter))) |
||
143 | |||
144 | if not leaf_page.recovered_records: |
||
145 | continue |
||
146 | |||
147 | # Recovered records are in an unordered set because their rowid |
||
148 | # has been lost, making sorting impossible |
||
149 | for record in leaf_page.recovered_records: |
||
150 | values_iter = ( |
||
151 | record.fields[idx].value for idx in record.fields |
||
152 | ) |
||
153 | writer.writerow(dict(zip(self._columns, values_iter))) |
||
154 | |||
155 | if csv_temp.tell() > 0: |
||
156 | csv_temp.seek(0) |
||
157 | with open(csv_path, 'w', encoding='UTF8') as csv_file: |
||
158 | csv_file.write(csv_temp.read()) |
||
159 | |||
160 | def build_insert_SQL(self, record): |
||
161 | column_placeholders = ( |
||
162 | ':' + col_name for col_name in self._columns |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
163 | ) |
||
164 | insert_statement = 'INSERT INTO {} VALUES ({})'.format( |
||
165 | self.name, |
||
166 | ', '.join(c for c in column_placeholders), |
||
167 | ) |
||
168 | value_kwargs = {} |
||
169 | for col_idx, col_name in enumerate(self._columns): |
||
170 | try: |
||
171 | if record.fields[col_idx].value is None: |
||
172 | value_kwargs[col_name] = None |
||
173 | else: |
||
174 | value_kwargs[col_name] = record.fields[col_idx].value |
||
175 | except KeyError: |
||
176 | value_kwargs[col_name] = None |
||
177 | |||
178 | return insert_statement, value_kwargs |
||
179 | |||
180 | def check_signature(self, record): |
||
181 | assert isinstance(record, Record) |
||
182 | try: |
||
183 | sig = self._signatures[self.name] |
||
184 | except KeyError: |
||
185 | # The sqlite schema tables don't have a signature (or need one) |
||
186 | return True |
||
187 | if len(record.fields) > len(self.columns): |
||
188 | return False |
||
189 | |||
190 | # It's OK for a record to have fewer fields than there are columns in |
||
191 | # this table, this is seen when NULLable or default-valued columns are |
||
192 | # added in an ALTER TABLE statement. |
||
193 | for field_idx, field in record.fields.items(): |
||
194 | # NULL can be a value for any column type |
||
195 | if field.value is None: |
||
196 | continue |
||
197 | if not isinstance(field.value, sig[field_idx]): |
||
198 | return False |
||
199 | return True |
||
200 |