Issues (17)

src/table.py (3 issues)

1
# MIT License
2
#
3
# Copyright (c) 2017 Matt Boyer
4
#
5
# Permission is hereby granted, free of charge, to any person obtaining a copy
6
# of this software and associated documentation files (the "Software"), to deal
7
# in the Software without restriction, including without limitation the rights
8
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
# copies of the Software, and to permit persons to whom the Software is
10
# furnished to do so, subject to the following conditions:
11
#
12
# The above copyright notice and this permission notice shall be included in
13
# all copies or substantial portions of the Software.
14
#
15
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
# SOFTWARE.
22
23
import csv
24
import os
25
import tempfile
26
27
from . import _LOGGER
28
from .record import Record
29
from .pages import BTreePage
30
31
32 View Code Duplication
class Table(object):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
33
    def __init__(self, name, db, rootpage, signatures):
34
        self._name = name
35
        self._db = db
36
        self._signatures = signatures
37
        assert(isinstance(rootpage, BTreePage))
38
        self._root = rootpage
39
        self._leaves = []
40
        try:
41
            self._columns = self._db.table_columns[self.name]
42
        except KeyError:
43
            self._columns = None
44
45
        # We want this to be a list of leaf-type pages, sorted in the order of
46
        # their smallest rowid
47
        self._populate_pages()
48
49
    @property
50
    def name(self):
51
        return self._name
52
53
    def add_leaf(self, leaf_page):
54
        self._leaves.append(leaf_page)
55
56
    @property
57
    def columns(self):
58
        return self._columns
59
60
    def __repr__(self):
61
        return "<SQLite table \"{}\", root: {}, leaves: {}>".format(
62
            self.name, self._root.idx, len(self._leaves)
63
        )
64
65
    def _populate_pages(self):
66
        _LOGGER.info("Page %d is root for %s", self._root.idx, self.name)
67
        table_pages = [self._root]
68
69
        if self._root.btree_header.right_most_page_idx is not None:
70
            rightmost_idx = self._root.btree_header.right_most_page_idx
71
            rightmost_page = self._db.pages[rightmost_idx]
72
            if rightmost_page is not self._root:
73
                _LOGGER.info(
74
                    "Page %d is rightmost for %s",
75
                    rightmost_idx, self.name
76
                )
77
                table_pages.append(rightmost_page)
78
79
        page_queue = list(table_pages)
80
        while page_queue:
81
            table_page = page_queue.pop(0)
82
            # table_pages is initialised with the table's rootpage, which
83
            # may be a leaf page for a very small table
84
            if table_page.page_type != 'Table Interior':
85
                self._leaves.append(table_page)
86
                continue
87
88
            for cell_idx in table_page.cells:
89
                page_ptr, max_row_in_page = table_page.cells[cell_idx]
90
91
                page = self._db.pages[page_ptr]
92
                _LOGGER.debug("B-Tree cell: (%r, %d)", page, max_row_in_page)
93
                table_pages.append(page)
94
                if page.page_type == 'Table Interior':
95
                    page_queue.append(page)
96
                elif page.page_type == 'Table Leaf':
97
                    self._leaves.append(page)
98
99
        assert(all(p.page_type == 'Table Leaf' for p in self._leaves))
100
        for page in table_pages:
101
            self._db.map_table_page(page.idx, self)
102
103
    @property
104
    def leaves(self):
105
        for leaf_page in self._leaves:
106
            yield leaf_page
107
108
    def recover_records(self, grouping):
109
        for page in self.leaves:
110
            assert isinstance(page, BTreePage)
111
            if not page.freeblocks:
112
                continue
113
114
            _LOGGER.info("%r", page)
115
            page.recover_freeblock_records(grouping)
116
            page.print_recovered_records()
117
118
    def csv_dump(self, out_dir):
119
        csv_path = os.path.join(out_dir, self.name + '.csv')
120
        if os.path.exists(csv_path):
121
            raise ValueError("Output file {} exists!".format(csv_path))
122
123
        _LOGGER.info("Dumping table \"%s\" to CSV", self.name)
124
        with tempfile.TemporaryFile('w+', newline='') as csv_temp:
125
            writer = csv.DictWriter(csv_temp, fieldnames=self._columns)
126
            writer.writeheader()
127
128
            for leaf_page in self.leaves:
129
                for cell_idx in leaf_page.cells:
130
                    rowid, record = leaf_page.cells[cell_idx]
131
                    # assert(self.check_signature(record))
132
133
                    _LOGGER.debug('Record %d: %r', rowid, record.header)
134
                    fields_iter = (
135
                        repr(record.fields[idx]) for idx in record.fields
0 ignored issues
show
The variable idx does not seem to be defined for all execution paths.
Loading history...
136
                    )
137
                    _LOGGER.debug(', '.join(fields_iter))
138
139
                    values_iter = (
140
                        record.fields[idx].value for idx in record.fields
141
                    )
142
                    writer.writerow(dict(zip(self._columns, values_iter)))
143
144
                if not leaf_page.recovered_records:
145
                    continue
146
147
                # Recovered records are in an unordered set because their rowid
148
                # has been lost, making sorting impossible
149
                for record in leaf_page.recovered_records:
150
                    values_iter = (
151
                        record.fields[idx].value for idx in record.fields
152
                    )
153
                    writer.writerow(dict(zip(self._columns, values_iter)))
154
155
            if csv_temp.tell() > 0:
156
                csv_temp.seek(0)
157
                with open(csv_path, 'w', encoding='UTF8') as csv_file:
158
                    csv_file.write(csv_temp.read())
159
160
    def build_insert_SQL(self, record):
161
        column_placeholders = (
162
            ':' + col_name for col_name in self._columns
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable col_name does not seem to be defined.
Loading history...
163
        )
164
        insert_statement = 'INSERT INTO {} VALUES ({})'.format(
165
            self.name,
166
            ', '.join(c for c in column_placeholders),
167
        )
168
        value_kwargs = {}
169
        for col_idx, col_name in enumerate(self._columns):
170
            try:
171
                if record.fields[col_idx].value is None:
172
                    value_kwargs[col_name] = None
173
                else:
174
                    value_kwargs[col_name] = record.fields[col_idx].value
175
            except KeyError:
176
                value_kwargs[col_name] = None
177
178
        return insert_statement, value_kwargs
179
180
    def check_signature(self, record):
181
        assert isinstance(record, Record)
182
        try:
183
            sig = self._signatures[self.name]
184
        except KeyError:
185
            # The sqlite schema tables don't have a signature (or need one)
186
            return True
187
        if len(record.fields) > len(self.columns):
188
            return False
189
190
        # It's OK for a record to have fewer fields than there are columns in
191
        # this table, this is seen when NULLable or default-valued columns are
192
        # added in an ALTER TABLE statement.
193
        for field_idx, field in record.fields.items():
194
            # NULL can be a value for any column type
195
            if field.value is None:
196
                continue
197
            if not isinstance(field.value, sig[field_idx]):
198
                return False
199
        return True
200