Passed
Pull Request — master (#7)
by Matt
01:37
created

Field._check_length()   A

Complexity

Conditions 2

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 3
rs 10
1
# MIT License
0 ignored issues
show
coding-style introduced by
Too many lines in module (1649/1000)
Loading history...
Coding Style introduced by
This module should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
2
#
3
# Copyright (c) 2017 Matt Boyer
4
#
5
# Permission is hereby granted, free of charge, to any person obtaining a copy
6
# of this software and associated documentation files (the "Software"), to deal
7
# in the Software without restriction, including without limitation the rights
8
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
# copies of the Software, and to permit persons to whom the Software is
10
# furnished to do so, subject to the following conditions:
11
#
12
# The above copyright notice and this permission notice shall be included in
13
# all copies or substantial portions of the Software.
14
#
15
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
# SOFTWARE.
22
23
from . import constants
24
from . import PROJECT_NAME, PROJECT_DESCRIPTION, USER_JSON_PATH, BUILTIN_JSON
25
26
import argparse
0 ignored issues
show
introduced by
standard import "import argparse" should be placed before "from . import constants"
Loading history...
27
import base64
0 ignored issues
show
introduced by
standard import "import base64" should be placed before "from . import constants"
Loading history...
28
import collections
0 ignored issues
show
introduced by
standard import "import collections" should be placed before "from . import constants"
Loading history...
29
import csv
0 ignored issues
show
introduced by
standard import "import csv" should be placed before "from . import constants"
Loading history...
30
import json
0 ignored issues
show
introduced by
standard import "import json" should be placed before "from . import constants"
Loading history...
31
import logging
0 ignored issues
show
introduced by
standard import "import logging" should be placed before "from . import constants"
Loading history...
32
import os
0 ignored issues
show
introduced by
standard import "import os" should be placed before "from . import constants"
Loading history...
33
import os.path
0 ignored issues
show
introduced by
standard import "import os.path" should be placed before "from . import constants"
Loading history...
34
import pdb
0 ignored issues
show
introduced by
standard import "import pdb" should be placed before "from . import constants"
Loading history...
35
import pkg_resources
0 ignored issues
show
introduced by
external import "import pkg_resources" should be placed before "from . import constants"
Loading history...
36
import re
0 ignored issues
show
introduced by
standard import "import re" should be placed before "import pkg_resources"
Loading history...
37
import shutil
0 ignored issues
show
introduced by
standard import "import shutil" should be placed before "import pkg_resources"
Loading history...
38
import sqlite3
0 ignored issues
show
introduced by
standard import "import sqlite3" should be placed before "import pkg_resources"
Loading history...
39
import stat
0 ignored issues
show
introduced by
standard import "import stat" should be placed before "import pkg_resources"
Loading history...
40
import struct
0 ignored issues
show
introduced by
standard import "import struct" should be placed before "import pkg_resources"
Loading history...
41
import tempfile
0 ignored issues
show
introduced by
standard import "import tempfile" should be placed before "import pkg_resources"
Loading history...
42
43
44
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s')
45
_LOGGER = logging.getLogger('SQLite recovery')
46
_LOGGER.setLevel(logging.INFO)
47
48
49
SQLite_header = collections.namedtuple('SQLite_header', (
0 ignored issues
show
Coding Style Naming introduced by
The name SQLite_header does not conform to the class naming conventions ([A-Z_][a-zA-Z0-9]+$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
50
    'magic',
51
    'page_size',
52
    'write_format',
53
    'read_format',
54
    'reserved_length',
55
    'max_payload_fraction',
56
    'min_payload_fraction',
57
    'leaf_payload_fraction',
58
    'file_change_counter',
59
    'size_in_pages',
60
    'first_freelist_trunk',
61
    'freelist_pages',
62
    'schema_cookie',
63
    'schema_format',
64
    'default_page_cache_size',
65
    'largest_btree_page',
66
    'text_encoding',
67
    'user_version',
68
    'incremental_vacuum',
69
    'application_id',
70
    'version_valid',
71
    'sqlite_version',
72
))
73
74
75
SQLite_btree_page_header = collections.namedtuple('SQLite_btree_page_header', (
0 ignored issues
show
Coding Style Naming introduced by
The name SQLite_btree_page_header does not conform to the class naming conventions ([A-Z_][a-zA-Z0-9]+$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
76
    'page_type',
77
    'first_freeblock_offset',
78
    'num_cells',
79
    'cell_content_offset',
80
    'num_fragmented_free_bytes',
81
    'right_most_page_idx',
82
))
83
84
85
SQLite_ptrmap_info = collections.namedtuple('SQLite_ptrmap_info', (
0 ignored issues
show
Coding Style Naming introduced by
The name SQLite_ptrmap_info does not conform to the class naming conventions ([A-Z_][a-zA-Z0-9]+$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
86
    'page_idx',
87
    'page_type',
88
    'page_ptr',
89
))
90
91
92
SQLite_record_field = collections.namedtuple('SQLite_record_field', (
0 ignored issues
show
Coding Style Naming introduced by
The name SQLite_record_field does not conform to the class naming conventions ([A-Z_][a-zA-Z0-9]+$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
93
    'col_type',
94
    'col_type_descr',
95
    'field_length',
96
    'field_bytes',
97
))
98
99
100
SQLite_master_record = collections.namedtuple('SQLite_master_record', (
0 ignored issues
show
Coding Style Naming introduced by
The name SQLite_master_record does not conform to the class naming conventions ([A-Z_][a-zA-Z0-9]+$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
101
    'type',
102
    'name',
103
    'tbl_name',
104
    'rootpage',
105
    'sql',
106
))
107
108
109
heuristics = {}
0 ignored issues
show
Coding Style Naming introduced by
The name heuristics does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
110
111
112
def heuristic_factory(magic, offset):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
113
    assert(isinstance(magic, bytes))
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
114
    assert(isinstance(offset, int))
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
115
    assert(offset >= 0)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
116
117
    # We only need to compile the regex once
118
    magic_re = re.compile(magic)
119
120
    def generic_heuristic(freeblock_bytes):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
121
        all_matches = [match for match in magic_re.finditer(freeblock_bytes)]
122
        for magic_match in all_matches[::-1]:
123
            header_start = magic_match.start()-offset
124
            if header_start < 0:
125
                _LOGGER.debug("Header start outside of freeblock!")
126
                break
127
            yield header_start
128
    return generic_heuristic
129
130
131
def load_heuristics():
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
132
133
    def _load_from_json(raw_json):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
134
        if isinstance(raw_json, bytes):
135
            raw_json = raw_json.decode('utf-8')
136
        for table_name, heuristic_params in json.loads(raw_json).items():
137
            magic = base64.standard_b64decode(
138
                heuristic_params['magic']
139
            )
140
            heuristics[table_name] = heuristic_factory(
141
                magic, heuristic_params['offset']
142
            )
143
            _LOGGER.debug("Loaded heuristics for \"%s\"", table_name)
144
145
    with pkg_resources.resource_stream(PROJECT_NAME, BUILTIN_JSON) as builtin:
146
        _load_from_json(builtin.read())
147
148
    if not os.path.exists(USER_JSON_PATH):
149
        return
150
    with open(USER_JSON_PATH, 'r') as user_json:
151
        _load_from_json(user_json.read())
152
153
154
class IndexDict(dict):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
155
    def __iter__(self):
156
        for k in sorted(self.keys()):
157
            yield k
158
159
160
class SQLite_DB(object):
0 ignored issues
show
Coding Style Naming introduced by
The name SQLite_DB does not conform to the class naming conventions ([A-Z_][a-zA-Z0-9]+$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
161
    def __init__(self, path):
162
        self._path = path
163
        self._page_types = {}
164
        self._header = self.parse_header()
165
166
        self._page_cache = None
167
        # Actual page objects go here
168
        self._pages = {}
169
        self.build_page_cache()
170
171
        self._ptrmap = {}
172
173
        # TODO Do we need all of these?
174
        self._table_roots = {}
175
        self._page_tables = {}
176
        self._tables = {}
177
        self._table_columns = {}
178
        self._freelist_leaves = []
179
180
    @property
181
    def ptrmap(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
182
        return self._ptrmap
183
184
    @property
185
    def header(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
186
        return self._header
187
188
    @property
189
    def pages(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
190
        return self._pages
191
192
    @property
193
    def tables(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
194
        return self._tables
195
196
    @property
197
    def freelist_leaves(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
198
        return self._freelist_leaves
199
200
    @property
201
    def table_columns(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
202
        return self._table_columns
203
204
    def page_bytes(self, page_idx):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
205
        try:
206
            return self._page_cache[page_idx]
207
        except KeyError:
208
            raise ValueError("No cache for page %d", page_idx)
209
210
    def map_table_page(self, page_idx, table):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
211
        assert isinstance(page_idx, int)
212
        assert isinstance(table, Table)
213
        self._page_tables[page_idx] = table
214
215
    def get_page_table(self, page_idx):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
216
        assert isinstance(page_idx, int)
217
        try:
218
            return self._page_tables[page_idx]
219
        except KeyError:
220
            return None
221
222
    def __repr__(self):
223
        return '<SQLite DB, page count: {} | page size: {}>'.format(
224
            self.header.size_in_pages,
225
            self.header.page_size
226
        )
227
228
    def parse_header(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
229
        header_bytes = None
230
        file_size = None
231
        with open(self._path, 'br') as sqlite:
232
            header_bytes = sqlite.read(100)
233
            file_size = os.fstat(sqlite.fileno())[stat.ST_SIZE]
234
235
        if not header_bytes:
236
            raise ValueError("Couldn't read SQLite header")
237
        assert isinstance(header_bytes, bytes)
238
        # This DB header is always big-endian
239
        fields = SQLite_header(*struct.unpack(
240
            r'>16sHBBBBBBIIIIIIIIIIII20xII',
241
            header_bytes[:100]
242
        ))
243
        assert fields.page_size in constants.VALID_PAGE_SIZES
244
        db_size = fields.page_size * fields.size_in_pages
245
        assert db_size <= file_size
246
        assert (fields.page_size > 0) and \
247
            (fields.file_change_counter == fields.version_valid)
248
249
        if file_size < 1073741824:
250
            _LOGGER.debug("No lock-byte page in this file!")
251
252
        if fields.first_freelist_trunk > 0:
253
            self._page_types[fields.first_freelist_trunk] = \
254
                constants.FREELIST_TRUNK_PAGE
255
        _LOGGER.debug(fields)
256
        return fields
257
258
    def build_page_cache(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
259
        # The SQLite docs use a numbering convention for pages where the
260
        # first page (the one that has the header) is page 1, with the next
261
        # ptrmap page being page 2, etc.
262
        page_cache = [None, ]
263
        with open(self._path, 'br') as sqlite:
264
            for page_idx in range(self._header.size_in_pages):
265
                page_offset = page_idx * self._header.page_size
266
                sqlite.seek(page_offset, os.SEEK_SET)
267
                page_cache.append(sqlite.read(self._header.page_size))
268
        self._page_cache = page_cache
269
        for page_idx in range(1, len(self._page_cache)):
270
            # We want these to be temporary objects, to be replaced with
271
            # more specialised objects as parsing progresses
272
            self._pages[page_idx] = Page(page_idx, self)
273
274
    def populate_freelist_pages(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
275
        if 0 == self._header.first_freelist_trunk:
0 ignored issues
show
introduced by
Comparison should be self._header.first_freelist_trunk == 0
Loading history...
276
            _LOGGER.debug("This database has no freelist trunk page")
277
            return
278
279
        _LOGGER.info("Parsing freelist pages")
280
        parsed_trunks = 0
281
        parsed_leaves = 0
282
        freelist_trunk_idx = self._header.first_freelist_trunk
283
284
        while freelist_trunk_idx != 0:
285
            _LOGGER.debug(
286
                "Parsing freelist trunk page %d",
287
                freelist_trunk_idx
288
            )
289
            trunk_bytes = bytes(self.pages[freelist_trunk_idx])
290
291
            next_freelist_trunk_page_idx, num_leaf_pages = struct.unpack(
292
                r'>II',
293
                trunk_bytes[:8]
294
            )
295
296
            # Now that we know how long the array of freelist page pointers is,
297
            # let's read it again
298
            trunk_array = struct.unpack(
299
                r'>{count}I'.format(count=2+num_leaf_pages),
300
                trunk_bytes[:(4*(2+num_leaf_pages))]
301
            )
302
303
            # We're skipping the first entries as they are realy the next trunk
304
            # index and the leaf count
305
            # TODO Fix that
306
            leaves_in_trunk = []
307
            for page_idx in trunk_array[2:]:
308
                # Let's prepare a specialised object for this freelist leaf
309
                # page
310
                leaf_page = FreelistLeafPage(
311
                    page_idx, self, freelist_trunk_idx
312
                )
313
                leaves_in_trunk.append(leaf_page)
314
                self._freelist_leaves.append(page_idx)
315
                self._pages[page_idx] = leaf_page
316
317
                self._page_types[page_idx] = constants.FREELIST_LEAF_PAGE
318
319
            trunk_page = FreelistTrunkPage(
320
                freelist_trunk_idx,
321
                self,
322
                leaves_in_trunk
323
            )
324
            self._pages[freelist_trunk_idx] = trunk_page
325
            # We've parsed this trunk page
326
            parsed_trunks += 1
327
            # ...And every leaf in it
328
            parsed_leaves += num_leaf_pages
329
330
            freelist_trunk_idx = next_freelist_trunk_page_idx
331
332
        assert (parsed_trunks + parsed_leaves) == self._header.freelist_pages
333
        _LOGGER.info(
334
            "Freelist summary: %d trunk pages, %d leaf pages",
335
            parsed_trunks,
336
            parsed_leaves
337
        )
338
339
    def populate_overflow_pages(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
340
        # Knowledge of the overflow pages can come from the pointer map (easy),
341
        # or the parsing of individual cells in table leaf pages (hard)
342
        #
343
        # For now, assume we already have a page type dict populated from the
344
        # ptrmap
345
        _LOGGER.info("Parsing overflow pages")
346
        overflow_count = 0
347
        for page_idx in sorted(self._page_types):
348
            page_type = self._page_types[page_idx]
349
            if page_type not in constants.OVERFLOW_PAGE_TYPES:
350
                continue
351
            overflow_page = OverflowPage(page_idx, self)
352
            self.pages[page_idx] = overflow_page
353
            overflow_count += 1
354
355
        _LOGGER.info("Overflow summary: %d pages", overflow_count)
356
357
    def populate_ptrmap_pages(self):
0 ignored issues
show
introduced by
'SQLite_DB.populate_ptrmap_pages' is too complex (11)
Loading history...
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
358
        if self._header.largest_btree_page == 0:
359
            # We don't have ptrmap pages in this DB. That sucks.
360
            _LOGGER.warning("%r does not have ptrmap pages!", self)
361
            for page_idx in range(1, self._header.size_in_pages):
362
                self._page_types[page_idx] = constants.UNKNOWN_PAGE
363
            return
364
365
        _LOGGER.info("Parsing ptrmap pages")
366
367
        ptrmap_page_idx = 2
368
        usable_size = self._header.page_size - self._header.reserved_length
369
        num_ptrmap_entries_in_page = usable_size // 5
370
        ptrmap_page_indices = []
371
372
        ptrmap_page_idx = 2
373
        while ptrmap_page_idx <= self._header.size_in_pages:
374
            page_bytes = self._page_cache[ptrmap_page_idx]
375
            ptrmap_page_indices.append(ptrmap_page_idx)
376
            self._page_types[ptrmap_page_idx] = constants.PTRMAP_PAGE
377
            page_ptrmap_entries = {}
378
379
            ptrmap_bytes = page_bytes[:5 * num_ptrmap_entries_in_page]
380
            for entry_idx in range(num_ptrmap_entries_in_page):
381
                ptr_page_idx = ptrmap_page_idx + entry_idx + 1
382
                page_type, page_ptr = struct.unpack(
383
                    r'>BI',
384
                    ptrmap_bytes[5*entry_idx:5*(entry_idx+1)]
385
                )
386
                if page_type == 0:
387
                    break
388
389
                ptrmap_entry = SQLite_ptrmap_info(
390
                    ptr_page_idx, page_type, page_ptr
391
                )
392
                assert ptrmap_entry.page_type in constants.PTRMAP_PAGE_TYPES
393
                if page_type == constants.BTREE_ROOT_PAGE:
394
                    assert page_ptr == 0
395
                    self._page_types[ptr_page_idx] = page_type
396
397
                elif page_type == constants.FREELIST_PAGE:
398
                    # Freelist pages are assumed to be known already
399
                    assert self._page_types[ptr_page_idx] in \
400
                        constants.FREELIST_PAGE_TYPES
401
                    assert page_ptr == 0
402
403
                elif page_type == constants.FIRST_OFLOW_PAGE:
404
                    assert page_ptr != 0
405
                    self._page_types[ptr_page_idx] = page_type
406
407
                elif page_type == constants.NON_FIRST_OFLOW_PAGE:
408
                    assert page_ptr != 0
409
                    self._page_types[ptr_page_idx] = page_type
410
411
                elif page_type == constants.BTREE_NONROOT_PAGE:
412
                    assert page_ptr != 0
413
                    self._page_types[ptr_page_idx] = page_type
414
415
                # _LOGGER.debug("%r", ptrmap_entry)
416
                self._ptrmap[ptr_page_idx] = ptrmap_entry
417
                page_ptrmap_entries[ptr_page_idx] = ptrmap_entry
418
419
            page = PtrmapPage(ptrmap_page_idx, self, page_ptrmap_entries)
420
            self._pages[ptrmap_page_idx] = page
421
            _LOGGER.debug("%r", page)
422
            ptrmap_page_idx += num_ptrmap_entries_in_page + 1
423
424
        _LOGGER.info(
425
            "Ptrmap summary: %d pages, %r",
426
            len(ptrmap_page_indices), ptrmap_page_indices
427
        )
428
429
    def populate_btree_pages(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
430
        # TODO Should this use table information instead of scanning all pages?
431
        page_idx = 1
432
        while page_idx <= self._header.size_in_pages:
433
            try:
434
                if self._page_types[page_idx] in \
435
                        constants.NON_BTREE_PAGE_TYPES:
436
                    page_idx += 1
437
                    continue
438
            except KeyError:
439
                pass
440
441
            try:
442
                page_obj = BTreePage(page_idx, self)
443
            except ValueError:
444
                # This page isn't a valid btree page. This can happen if we
445
                # don't have a ptrmap to guide us
446
                _LOGGER.warning(
447
                    "Page %d (%s) is not a btree page",
448
                    page_idx,
449
                    self._page_types[page_idx]
450
                )
451
                page_idx += 1
452
                continue
453
454
            page_obj.parse_cells()
455
            self._page_types[page_idx] = page_obj.page_type
456
            self._pages[page_idx] = page_obj
457
            page_idx += 1
458
459
    def _parse_master_leaf_page(self, page):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
460
        for cell_idx in page.cells:
461
            _, master_record = page.cells[cell_idx]
462
            assert isinstance(master_record, Record)
463
            fields = [
464
                master_record.fields[idx].value for idx in master_record.fields
465
            ]
466
            master_record = SQLite_master_record(*fields)
467
            if 'table' != master_record.type:
0 ignored issues
show
introduced by
Comparison should be master_record.type != 'table'
Loading history...
468
                continue
469
470
            self._table_roots[master_record.name] = \
471
                self.pages[master_record.rootpage]
472
473
            # This record describes a table in the schema, which means it
474
            # includes a SQL statement that defines the table's columns
475
            # We need to parse the field names out of that statement
476
            assert master_record.sql.startswith('CREATE TABLE')
477
            columns_re = re.compile(r'^CREATE TABLE (\S+) \((.*)\)$')
478
            match = columns_re.match(master_record.sql)
479
            if match:
480
                assert match.group(1) == master_record.name
481
                column_list = match.group(2)
482
                csl_between_parens_re = re.compile(r'\([^)]+\)')
483
                expunged = csl_between_parens_re.sub('', column_list)
484
485
                cols = [
486
                    statement.strip() for statement in expunged.split(',')
487
                ]
488
                cols = [
489
                    statement for statement in cols if not (
490
                        statement.startswith('PRIMARY') or
491
                        statement.startswith('UNIQUE')
492
                    )
493
                ]
494
                columns = [col.split()[0] for col in cols]
495
                _LOGGER.info(
496
                    "Columns for table \"%s\": %r",
497
                    master_record.name, columns
498
                )
499
                self._table_columns[master_record.name] = columns
500
501
    def map_tables(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
502
        first_page = self.pages[1]
503
        assert isinstance(first_page, BTreePage)
504
505
        master_table = Table('sqlite_master', self, first_page)
506
        self._table_columns.update(constants.SQLITE_TABLE_COLUMNS)
507
508
        for master_leaf in master_table.leaves:
509
            self._parse_master_leaf_page(master_leaf)
510
511
        assert all(
512
            isinstance(root, BTreePage) for root in self._table_roots.values()
513
        )
514
        assert all(
515
            root.parent is None for root in self._table_roots.values()
516
        )
517
518
        self.map_table_page(1, master_table)
519
        self._table_roots['sqlite_master'] = self.pages[1]
520
521
        for table_name, rootpage in self._table_roots.items():
522
            try:
523
                table_obj = Table(table_name, self, rootpage)
524
            except Exception as ex:  # pylint:disable=W0703
525
                pdb.set_trace()
526
                _LOGGER.warning(
527
                    "Caught %r while instantiating table object for \"%s\"",
528
                    ex, table_name
529
                )
530
            else:
531
                self._tables[table_name] = table_obj
532
533
    def reparent_orphaned_table_leaf_pages(self):
0 ignored issues
show
Coding Style Naming introduced by
The name reparent_orphaned_table_leaf_pages does not conform to the method naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
534
        reparented_pages = []
535
        for page in self.pages.values():
536
            if not isinstance(page, BTreePage):
537
                continue
538
            if page.page_type != "Table Leaf":
539
                continue
540
541
            table = page.table
542
            if not table:
543
                parent = page
544
                root_table = None
545
                while parent:
546
                    root_table = parent.table
547
                    parent = parent.parent
548
549
                _LOGGER.debug(
550
                    "Reparenting %r to table \"%s\"",
551
                    page, root_table.name
552
                )
553
                root_table.add_leaf(page)
554
                self.map_table_page(page.idx, root_table)
555
                reparented_pages.append(page)
556
557
        if reparented_pages:
558
            _LOGGER.info(
559
                "Reparented %d pages: %r",
560
                len(reparented_pages), [p.idx for p in reparented_pages]
561
            )
562
563
    def grep(self, needle):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
564
        match_found = False
565
        page_idx = 1
566
        needle_re = re.compile(needle.encode('utf-8'))
567
        while (page_idx <= self.header.size_in_pages):
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after while.
Loading history...
568
            page = self.pages[page_idx]
569
            page_offsets = []
570
            for match in needle_re.finditer(bytes(page)):
571
                needle_offset = match.start()
572
                page_offsets.append(needle_offset)
573
            if page_offsets:
574
                _LOGGER.info(
575
                    "Found search term in page %r @ offset(s) %s",
576
                    page, ', '.join(str(offset) for offset in page_offsets)
577
                )
578
            page_idx += 1
579
        if not match_found:
580
            _LOGGER.warning(
581
                "Search term not found",
582
            )
583
584
585
class Table(object):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
586
    def __init__(self, name, db, rootpage):
587
        self._name = name
588
        self._db = db
589
        assert(isinstance(rootpage, BTreePage))
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
590
        self._root = rootpage
591
        self._leaves = []
592
        try:
593
            self._columns = self._db.table_columns[self.name]
594
        except KeyError:
595
            self._columns = None
596
597
        # We want this to be a list of leaf-type pages, sorted in the order of
598
        # their smallest rowid
599
        self._populate_pages()
600
601
    @property
602
    def name(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
603
        return self._name
604
605
    def add_leaf(self, leaf_page):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
606
        self._leaves.append(leaf_page)
607
608
    @property
609
    def columns(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
610
        return self._columns
611
612
    def __repr__(self):
613
        return "<SQLite table \"{}\", root: {}, leaves: {}>".format(
614
            self.name, self._root.idx, len(self._leaves)
615
        )
616
617
    def _populate_pages(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
618
        _LOGGER.info("Page %d is root for %s", self._root.idx, self.name)
619
        table_pages = [self._root]
620
621
        if self._root.btree_header.right_most_page_idx is not None:
622
            rightmost_idx = self._root.btree_header.right_most_page_idx
623
            rightmost_page = self._db.pages[rightmost_idx]
624
            if rightmost_page is not self._root:
625
                _LOGGER.info(
626
                    "Page %d is rightmost for %s",
627
                    rightmost_idx, self.name
628
                )
629
                table_pages.append(rightmost_page)
630
631
        page_queue = list(table_pages)
632
        while page_queue:
633
            table_page = page_queue.pop(0)
634
            # table_pages is initialised with the table's rootpage, which
635
            # may be a leaf page for a very small table
636
            if table_page.page_type != 'Table Interior':
637
                self._leaves.append(table_page)
638
                continue
639
640
            for cell_idx in table_page.cells:
641
                page_ptr, max_row_in_page = table_page.cells[cell_idx]
642
643
                page = self._db.pages[page_ptr]
644
                _LOGGER.debug("B-Tree cell: (%r, %d)", page, max_row_in_page)
645
                table_pages.append(page)
646
                if page.page_type == 'Table Interior':
647
                    page_queue.append(page)
648
                elif page.page_type == 'Table Leaf':
649
                    self._leaves.append(page)
650
651
        assert(all(p.page_type == 'Table Leaf' for p in self._leaves))
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
652
        for page in table_pages:
653
            self._db.map_table_page(page.idx, self)
654
655
    @property
656
    def leaves(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
657
        for leaf_page in self._leaves:
658
            yield leaf_page
659
660
    def recover_records(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
661
        for page in self.leaves:
662
            assert isinstance(page, BTreePage)
663
            if not page.freeblocks:
664
                continue
665
666
            _LOGGER.info("%r", page)
667
            page.recover_freeblock_records()
668
            page.print_recovered_records()
669
670
    def csv_dump(self, out_dir):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
671
        csv_path = os.path.join(out_dir, self.name + '.csv')
672
        if os.path.exists(csv_path):
673
            raise ValueError("Output file {} exists!".format(csv_path))
674
675
        _LOGGER.info("Dumping table \"%s\" to CSV", self.name)
676
        with tempfile.TemporaryFile('w+', newline='') as csv_temp:
677
            writer = csv.DictWriter(csv_temp, fieldnames=self._columns)
678
            writer.writeheader()
679
680
            for leaf_page in self.leaves:
681
                for cell_idx in leaf_page.cells:
682
                    rowid, record = leaf_page.cells[cell_idx]
683
                    _LOGGER.debug('Record %d: %r', rowid, record.header)
684
                    fields_iter = (
685
                        repr(record.fields[idx]) for idx in record.fields
686
                    )
687
                    _LOGGER.debug(', '.join(fields_iter))
688
689
                    values_iter = (
690
                        record.fields[idx].value for idx in record.fields
691
                    )
692
                    writer.writerow(dict(zip(self._columns, values_iter)))
693
694
                if not leaf_page.recovered_records:
695
                    continue
696
697
                # Recovered records are in an unordered set because their rowid
698
                # has been lost, making sorting impossible
699
                for record in leaf_page.recovered_records:
700
                    values_iter = (
701
                        record.fields[idx].value for idx in record.fields
702
                    )
703
                    writer.writerow(dict(zip(self._columns, values_iter)))
704
705
            if csv_temp.tell() > 0:
706
                csv_temp.seek(0)
707
                with open(csv_path, 'w') as csv_file:
708
                    csv_file.write(csv_temp.read())
709
710
    def build_insert_SQL(self, record):
0 ignored issues
show
Coding Style Naming introduced by
The name build_insert_SQL does not conform to the method naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
711
        column_placeholders = (
712
            ':' + col_name for col_name in self._columns
713
        )
714
        insert_statement = 'INSERT INTO {} VALUES ({})'.format(
715
            self.name,
716
            ', '.join(c for c in column_placeholders),
717
        )
718
        value_kwargs = {}
719
        for col_idx, col_name in enumerate(self._columns):
720
            try:
721
                if record.fields[col_idx].value == 'NULL':
722
                    value_kwargs[col_name] = None
723
                else:
724
                    value_kwargs[col_name] = record.fields[col_idx].value
725
            except KeyError:
726
                value_kwargs[col_name] = None
727
728
        return insert_statement, value_kwargs
729
730
731
class Page(object):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
732
    def __init__(self, page_idx, db):
733
        self._page_idx = page_idx
734
        self._db = db
735
        self._bytes = db.page_bytes(self.idx)
736
737
    @property
738
    def idx(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
739
        return self._page_idx
740
741
    @property
742
    def usable_size(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
743
        return self._db.header.page_size - self._db.header.reserved_length
744
745
    def __bytes__(self):
746
        return self._bytes
747
748
    @property
749
    def parent(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
750
        try:
751
            parent_idx = self._db.ptrmap[self.idx].page_ptr
752
        except KeyError:
753
            return None
754
755
        if 0 == parent_idx:
0 ignored issues
show
introduced by
Comparison should be parent_idx == 0
Loading history...
756
            return None
757
        else:
758
            return self._db.pages[parent_idx]
759
760
    def __repr__(self):
761
        return "<SQLite Page {0}>".format(self.idx)
762
763
764
class FreelistTrunkPage(Page):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
765
    # XXX Maybe it would make sense to expect a Page instance as constructor
766
    # argument?
767
    def __init__(self, page_idx, db, leaves):
768
        super().__init__(page_idx, db)
769
        self._leaves = leaves
770
771
    def __repr__(self):
772
        return "<SQLite Freelist Trunk Page {0}: {1} leaves>".format(
773
            self.idx, len(self._leaves)
774
        )
775
776
777
class FreelistLeafPage(Page):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
778
    # XXX Maybe it would make sense to expect a Page instance as constructor
779
    # argument?
780
    def __init__(self, page_idx, db, trunk_idx):
781
        super().__init__(page_idx, db)
782
        self._trunk = self._db.pages[trunk_idx]
783
784
    def __repr__(self):
785
        return "<SQLite Freelist Leaf Page {0}. Trunk: {1}>".format(
786
            self.idx, self._trunk.idx
787
        )
788
789
790
class PtrmapPage(Page):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
791
    # XXX Maybe it would make sense to expect a Page instance as constructor
792
    # argument?
793
    def __init__(self, page_idx, db, ptr_array):
794
        super().__init__(page_idx, db)
795
        self._pointers = ptr_array
796
797
    @property
798
    def pointers(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
799
        return self._pointers
800
801
    def __repr__(self):
802
        return "<SQLite Ptrmap Page {0}. {1} pointers>".format(
803
            self.idx, len(self.pointers)
804
        )
805
806
807
class OverflowPage(Page):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
808
    # XXX Maybe it would make sense to expect a Page instance as constructor
809
    # argument?
810
    def __init__(self, page_idx, db):
811
        super().__init__(page_idx, db)
812
        self._parse()
813
814
    def _parse(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
815
        # TODO We should have parsing here for the next page index in the
816
        # overflow chain
817
        pass
818
819
    def __repr__(self):
820
        return "<SQLite Overflow Page {0}. Continuation of {1}>".format(
821
            self.idx, self.parent.idx
822
        )
823
824
825
class BTreePage(Page):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
826
    btree_page_types = {
827
        0x02:   "Index Interior",
828
        0x05:   "Table Interior",
829
        0x0A:   "Index Leaf",
830
        0x0D:   "Table Leaf",
831
    }
832
833
    def __init__(self, page_idx, db):
834
        # XXX We don't know a page's type until we've had a look at the header.
835
        # Or do we?
836
        super().__init__(page_idx, db)
837
        self._header_size = 8
838
        page_header_bytes = self._get_btree_page_header()
839
        self._btree_header = SQLite_btree_page_header(
840
            # Set the right-most page index to None in the 1st pass
841
            *struct.unpack(r'>BHHHB', page_header_bytes), None
842
        )
843
        self._cell_ptr_array = []
844
        self._freeblocks = IndexDict()
845
        self._cells = IndexDict()
846
        self._recovered_records = set()
847
        self._overflow_threshold = self.usable_size - 35
848
849
        if self._btree_header.page_type not in BTreePage.btree_page_types:
850
            pdb.set_trace()
851
            raise ValueError
852
853
        # We have a twelve-byte header, need to read it again
854
        if self._btree_header.page_type in (0x02, 0x05):
855
            self._header_size = 12
856
            page_header_bytes = self._get_btree_page_header()
857
            self._btree_header = SQLite_btree_page_header(*struct.unpack(
858
                r'>BHHHBI', page_header_bytes
859
            ))
860
861
        # Page 1 (and page 2, but that's the 1st ptrmap page) does not have a
862
        # ptrmap entry.
863
        # The first ptrmap page will contain back pointer information for pages
864
        # 3 through J+2, inclusive.
865
        if self._db.ptrmap:
866
            if self.idx >= 3 and self.idx not in self._db.ptrmap:
867
                _LOGGER.warning(
868
                    "BTree page %d doesn't have ptrmap entry!", self.idx
869
                )
870
871
        if self._btree_header.num_cells > 0:
872
            cell_ptr_bytes = self._get_btree_ptr_array(
873
                self._btree_header.num_cells
874
            )
875
            self._cell_ptr_array = struct.unpack(
876
                r'>{count}H'.format(count=self._btree_header.num_cells),
877
                cell_ptr_bytes
878
            )
879
            smallest_cell_offset = min(self._cell_ptr_array)
880
            if self._btree_header.cell_content_offset != smallest_cell_offset:
881
                _LOGGER.warning(
882
                    (
883
                        "Inconsistent cell ptr array in page %d! Cell content "
884
                        "starts at offset %d, but min cell pointer is %d"
885
                    ),
886
                    self.idx,
887
                    self._btree_header.cell_content_offset,
888
                    smallest_cell_offset
889
                )
890
891
    @property
892
    def btree_header(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
893
        return self._btree_header
894
895
    @property
896
    def page_type(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
897
        try:
898
            return self.btree_page_types[self._btree_header.page_type]
899
        except KeyError:
900
            pdb.set_trace()
901
            _LOGGER.warning(
902
                "Unknown B-Tree page type: %d", self._btree_header.page_type
903
            )
904
            raise
905
906
    @property
907
    def freeblocks(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
908
        return self._freeblocks
909
910
    @property
911
    def cells(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
912
        return self._cells
913
914
    def __repr__(self):
915
        # TODO Include table in repr, where available
916
        return "<SQLite B-Tree Page {0} ({1}) {2} cells>".format(
917
            self.idx, self.page_type, len(self._cell_ptr_array)
918
        )
919
920
    @property
921
    def table(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
922
        return self._db.get_page_table(self.idx)
923
924
    def _get_btree_page_header(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
925
        header_offset = 0
926
        if self.idx == 1:
927
            header_offset += 100
928
        return bytes(self)[header_offset:self._header_size + header_offset]
929
930
    def _get_btree_ptr_array(self, num_cells):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
931
        array_offset = self._header_size
932
        if self.idx == 1:
933
            array_offset += 100
934
        return bytes(self)[array_offset:2 * num_cells + array_offset]
935
936
    def parse_cells(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
937
        if self.btree_header.page_type == 0x05:
938
            self.parse_table_interior_cells()
939
        elif self.btree_header.page_type == 0x0D:
940
            self.parse_table_leaf_cells()
941
        self.parse_freeblocks()
942
943
    def parse_table_interior_cells(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
944
        if self.btree_header.page_type != 0x05:
945
            assert False
946
947
        _LOGGER.debug("Parsing cells in table interior cell %d", self.idx)
948
        for cell_idx, offset in enumerate(self._cell_ptr_array):
949
            _LOGGER.debug("Parsing cell %d @ offset %d", cell_idx, offset)
950
            left_ptr_bytes = bytes(self)[offset:offset + 4]
951
            left_ptr, = struct.unpack(r'>I', left_ptr_bytes)
952
953
            offset += 4
954
            integer_key = Varint(bytes(self)[offset:offset+9])
955
            self._cells[cell_idx] = (left_ptr, int(integer_key))
956
957
    def parse_table_leaf_cells(self):
0 ignored issues
show
introduced by
'BTreePage.parse_table_leaf_cells' is too complex (11)
Loading history...
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
958
        if self.btree_header.page_type != 0x0d:
959
            assert False
960
961
        _LOGGER.debug("Parsing cells in table leaf cell %d", self.idx)
962
        for cell_idx, cell_offset in enumerate(self._cell_ptr_array):
963
            _LOGGER.debug("Parsing cell %d @ offset %d", cell_idx, cell_offset)
964
965
            # This is the total size of the payload, which may include overflow
966
            offset = cell_offset
967
            payload_length_varint = Varint(bytes(self)[offset:offset+9])
968
            total_payload_size = int(payload_length_varint)
969
970
            overflow = False
971
            # Let X be U-35. If the payload size P is less than or equal to X
972
            # then the entire payload is stored on the b-tree leaf page. Let M
973
            # be ((U-12)*32/255)-23 and let K be M+((P-M)%(U-4)). If P is
974
            # greater than X then the number of bytes stored on the table
975
            # b-tree leaf page is K if K is less or equal to X or M otherwise.
976
            # The number of bytes stored on the leaf page is never less than M.
977
            cell_payload_size = 0
978
            if total_payload_size > self._overflow_threshold:
979
                m = int(((self.usable_size - 12) * 32/255)-23)
0 ignored issues
show
Coding Style Naming introduced by
The name m does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
980
                k = m + ((total_payload_size - m) % (self.usable_size - 4))
981
                if k <= self._overflow_threshold:
982
                    cell_payload_size = k
983
                else:
984
                    cell_payload_size = m
985
                overflow = True
986
            else:
987
                cell_payload_size = total_payload_size
988
989
            offset += len(payload_length_varint)
990
991
            integer_key = Varint(bytes(self)[offset:offset+9])
992
            offset += len(integer_key)
993
994
            overflow_bytes = bytes()
995
            if overflow:
996
                first_oflow_page_bytes = bytes(self)[
997
                    offset + cell_payload_size:offset + cell_payload_size + 4
998
                ]
999
                if not first_oflow_page_bytes:
1000
                    continue
1001
1002
                first_oflow_idx, = struct.unpack(
1003
                    r'>I', first_oflow_page_bytes
1004
                )
1005
                next_oflow_idx = first_oflow_idx
1006
                while next_oflow_idx != 0:
1007
                    oflow_page_bytes = self._db.page_bytes(next_oflow_idx)
1008
1009
                    len_overflow = min(
1010
                        len(oflow_page_bytes) - 4,
1011
                        (
1012
                            total_payload_size - cell_payload_size +
1013
                            len(overflow_bytes)
1014
                        )
1015
                    )
1016
                    overflow_bytes += oflow_page_bytes[4:4 + len_overflow]
1017
1018
                    first_four_bytes = oflow_page_bytes[:4]
1019
                    next_oflow_idx, = struct.unpack(
1020
                        r'>I', first_four_bytes
1021
                    )
1022
1023
            try:
1024
                cell_data = bytes(self)[offset:offset + cell_payload_size]
1025
                if overflow_bytes:
1026
                    cell_data += overflow_bytes
1027
1028
                # All payload bytes should be accounted for
1029
                assert len(cell_data) == total_payload_size
1030
1031
                record_obj = Record(cell_data)
1032
                _LOGGER.debug("Created record: %r", record_obj)
1033
1034
            except TypeError as ex:
1035
                _LOGGER.warning(
1036
                    "Caught %r while instantiating record %d",
1037
                    ex, int(integer_key)
1038
                )
1039
                pdb.set_trace()
1040
                raise
1041
1042
            self._cells[cell_idx] = (int(integer_key), record_obj)
1043
1044
    def parse_freeblocks(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1045
        # The first 2 bytes of a freeblock are a big-endian integer which is
1046
        # the offset in the b-tree page of the next freeblock in the chain, or
1047
        # zero if the freeblock is the last on the chain. The third and fourth
1048
        # bytes of each freeblock form a big-endian integer which is the size
1049
        # of the freeblock in bytes, including the 4-byte header. Freeblocks
1050
        # are always connected in order of increasing offset. The second field
1051
        # of the b-tree page header is the offset of the first freeblock, or
1052
        # zero if there are no freeblocks on the page. In a well-formed b-tree
1053
        # page, there will always be at least one cell before the first
1054
        # freeblock.
1055
        #
1056
        # TODO But what about deleted records that exceeded the overflow
1057
        # threshold in the past?
1058
        block_offset = self.btree_header.first_freeblock_offset
1059
        while block_offset != 0:
1060
            freeblock_header = bytes(self)[block_offset:block_offset + 4]
1061
            # Freeblock_size includes the 4-byte header
1062
            next_freeblock_offset, freeblock_size = struct.unpack(
1063
                r'>HH',
1064
                freeblock_header
1065
            )
1066
            freeblock_bytes = bytes(self)[
1067
                block_offset + 4:block_offset + freeblock_size - 4
1068
            ]
1069
            self._freeblocks[block_offset] = freeblock_bytes
1070
            block_offset = next_freeblock_offset
1071
1072
    def print_cells(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1073
        for cell_idx in self.cells.keys():
1074
            rowid, record = self.cells[cell_idx]
1075
            _LOGGER.info(
1076
                "Cell %d, rowid: %d, record: %r",
1077
                cell_idx, rowid, record
1078
            )
1079
            record.print_fields(table=self.table)
1080
1081
    def recover_freeblock_records(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1082
        # If we're lucky (i.e. if no overwriting has taken place), we should be
1083
        # able to find whole record headers in freeblocks.
1084
        # We need to start from the end of the freeblock and work our way back
1085
        # to the start. That means we don't know where a cell header will
1086
        # start, but I suppose we can take a guess
1087
        table = self.table
1088
        if not table or table.name not in heuristics:
1089
            return
1090
1091
        _LOGGER.info("Attempting to recover records from freeblocks")
1092
        for freeblock_idx, freeblock_offset in enumerate(self._freeblocks):
1093
            freeblock_bytes = self._freeblocks[freeblock_offset]
1094
            if 0 == len(freeblock_bytes):
0 ignored issues
show
introduced by
Comparison should be len(freeblock_bytes) == 0
Loading history...
Unused Code introduced by
Do not use len(SEQUENCE) as condition value
Loading history...
1095
                continue
1096
            _LOGGER.debug(
1097
                "Freeblock %d/%d in page, offset %d, %d bytes",
1098
                1 + freeblock_idx,
1099
                len(self._freeblocks),
1100
                freeblock_offset,
1101
                len(freeblock_bytes)
1102
            )
1103
1104
            recovered_bytes = 0
1105
            recovered_in_freeblock = 0
1106
1107
            # TODO Maybe we need to guess the record header lengths rather than
1108
            # try and read them from the freeblocks
1109
            for header_start in heuristics[table.name](freeblock_bytes):
1110
                _LOGGER.debug(
1111
                    (
1112
                        "Trying potential record header start at "
1113
                        "freeblock offset %d/%d"
1114
                    ),
1115
                    header_start, len(freeblock_bytes)
1116
                )
1117
                _LOGGER.debug("%r", freeblock_bytes)
1118
                try:
1119
                    # We don't know how to handle overflow in deleted records,
1120
                    # so we'll have to truncate the bytes object used to
1121
                    # instantiate the Record object
1122
                    record_bytes = freeblock_bytes[
1123
                        header_start:header_start+self._overflow_threshold
1124
                    ]
1125
                    record_obj = Record(record_bytes)
1126
                except MalformedRecord:
1127
                    # This isn't a well-formed record, let's move to the next
1128
                    # candidate
1129
                    continue
1130
1131
                field_lengths = sum(
1132
                    len(field_obj) for field_obj in record_obj.fields.values()
1133
                )
1134
                record_obj.truncate(field_lengths + len(record_obj.header))
1135
                self._recovered_records.add(record_obj)
1136
1137
                recovered_bytes += len(bytes(record_obj))
1138
                recovered_in_freeblock += 1
1139
1140
            _LOGGER.info(
1141
                (
1142
                    "Recovered %d record(s): %d bytes out of %d "
1143
                    "freeblock bytes @ offset %d"
1144
                ),
1145
                recovered_in_freeblock,
1146
                recovered_bytes,
1147
                len(freeblock_bytes),
1148
                freeblock_offset,
1149
            )
1150
1151
    @property
1152
    def recovered_records(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1153
        return self._recovered_records
1154
1155
    def print_recovered_records(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1156
        if not self._recovered_records:
1157
            return
1158
1159
        for record_obj in self._recovered_records:
1160
            _LOGGER.info("Recovered record: %r", record_obj)
1161
            _LOGGER.info("Recovered record header: %s", record_obj.header)
1162
            record_obj.print_fields(table=self.table)
1163
1164
1165
class Record(object):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1166
1167
    column_types = {
1168
        0: (0, "NULL"),
1169
        1: (1, "8-bit twos-complement integer"),
1170
        2: (2, "big-endian 16-bit twos-complement integer"),
1171
        3: (3, "big-endian 24-bit twos-complement integer"),
1172
        4: (4, "big-endian 32-bit twos-complement integer"),
1173
        5: (6, "big-endian 48-bit twos-complement integer"),
1174
        6: (8, "big-endian 64-bit twos-complement integer"),
1175
        7: (8, "Floating point"),
1176
        8: (0, "Integer 0"),
1177
        9: (0, "Integer 1"),
1178
    }
1179
1180
    def __init__(self, record_bytes):
1181
        self._bytes = record_bytes
1182
        self._header_bytes = None
1183
        self._fields = IndexDict()
1184
        self._parse()
1185
1186
    def __bytes__(self):
1187
        return self._bytes
1188
1189
    @property
1190
    def header(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1191
        return self._header_bytes
1192
1193
    @property
1194
    def fields(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1195
        return self._fields
1196
1197
    def truncate(self, new_length):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1198
        self._bytes = self._bytes[:new_length]
1199
        self._parse()
1200
1201
    def _parse(self):
0 ignored issues
show
introduced by
'Record._parse' is too complex (11)
Loading history...
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1202
        header_offset = 0
1203
1204
        header_length_varint = Varint(
1205
            # A varint is encoded on *at most* 9 bytes
1206
            bytes(self)[header_offset:9 + header_offset]
1207
        )
1208
1209
        # Let's keep track of how many bytes of the Record header (including
1210
        # the header length itself) we've succesfully parsed
1211
        parsed_header_bytes = len(header_length_varint)
1212
1213
        if len(bytes(self)) < int(header_length_varint):
1214
            raise MalformedRecord(
1215
                "Not enough bytes to fully read the record header!"
1216
            )
1217
1218
        header_offset += len(header_length_varint)
1219
        self._header_bytes = bytes(self)[:int(header_length_varint)]
1220
1221
        col_idx = 0
1222
        field_offset = int(header_length_varint)
1223
        while header_offset < int(header_length_varint):
1224
            serial_type_varint = Varint(
1225
                bytes(self)[header_offset:9 + header_offset]
1226
            )
1227
            serial_type = int(serial_type_varint)
1228
            col_length = None
1229
1230
            try:
1231
                col_length, _ = self.column_types[serial_type]
1232
            except KeyError:
1233
                if serial_type >= 13 and (1 == serial_type % 2):
0 ignored issues
show
introduced by
Comparison should be (serial_type) % (2) == 1
Loading history...
1234
                    col_length = (serial_type - 13) // 2
1235
                elif serial_type >= 12 and (0 == serial_type % 2):
0 ignored issues
show
introduced by
Comparison should be (serial_type) % (2) == 0
Loading history...
1236
                    col_length = (serial_type - 12) // 2
1237
                else:
1238
                    raise ValueError(
1239
                        "Unknown serial type {}".format(serial_type)
1240
                    )
1241
1242
            try:
1243
                field_obj = Field(
1244
                    col_idx,
1245
                    serial_type,
1246
                    bytes(self)[field_offset:field_offset + col_length]
1247
                )
1248
            except MalformedField as ex:
1249
                _LOGGER.warning(
1250
                    "Caught %r while instantiating field %d (%d)",
1251
                    ex, col_idx, serial_type
1252
                )
1253
                raise MalformedRecord
1254
            except Exception as ex:
1255
                _LOGGER.warning(
1256
                    "Caught %r while instantiating field %d (%d)",
1257
                    ex, col_idx, serial_type
1258
                )
1259
                pdb.set_trace()
1260
                raise
1261
1262
            self._fields[col_idx] = field_obj
1263
            col_idx += 1
1264
            field_offset += col_length
1265
1266
            parsed_header_bytes += len(serial_type_varint)
1267
            header_offset += len(serial_type_varint)
1268
1269
            if field_offset > len(bytes(self)):
1270
                raise MalformedRecord
1271
1272
        # assert(parsed_header_bytes == int(header_length_varint))
1273
1274
    def print_fields(self, table=None):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1275
        for field_idx in self._fields:
1276
            field_obj = self._fields[field_idx]
1277
            if not table or table.columns is None:
1278
                _LOGGER.info(
1279
                    "\tField %d (%d bytes), type %d: %s",
1280
                    field_obj.index,
1281
                    len(field_obj),
1282
                    field_obj.serial_type,
1283
                    field_obj.value
1284
                )
1285
            else:
1286
                _LOGGER.info(
1287
                    "\t%s: %s",
1288
                    table.columns[field_obj.index],
1289
                    field_obj.value
1290
                )
1291
1292
    def __repr__(self):
1293
        return '<Record {} fields, {} bytes, header: {} bytes>'.format(
1294
            len(self._fields), len(bytes(self)), len(self.header)
1295
        )
1296
1297
1298
class MalformedField(Exception):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1299
    pass
1300
1301
1302
class MalformedRecord(Exception):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1303
    pass
1304
1305
1306
class Field(object):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1307
    def __init__(self, idx, serial_type, serial_bytes):
1308
        self._index = idx
1309
        self._type = serial_type
1310
        self._bytes = serial_bytes
1311
        self._value = None
1312
        self._parse()
1313
1314
    def _check_length(self, expected_length):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1315
        if len(self) != expected_length:
1316
            raise MalformedField
1317
1318
    # TODO Raise a specific exception when bad bytes are encountered for the
1319
    # fields and then use this to weed out bad freeblock records
1320
    def _parse(self):
0 ignored issues
show
introduced by
'Field._parse' is too complex (15)
Loading history...
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1321
        if self._type == 0:
1322
            self._value = 'NULL'
1323
        # Integer types
1324
        elif self._type == 1:
1325
            self._check_length(1)
1326
            self._value = decode_twos_complement(bytes(self)[0:1], 8)
1327
        elif self._type == 2:
1328
            self._check_length(2)
1329
            self._value = decode_twos_complement(bytes(self)[0:2], 16)
1330
        elif self._type == 3:
1331
            self._check_length(3)
1332
            self._value = decode_twos_complement(bytes(self)[0:3], 24)
1333
        elif self._type == 4:
1334
            self._check_length(4)
1335
            self._value = decode_twos_complement(bytes(self)[0:4], 32)
1336
        elif self._type == 5:
1337
            self._check_length(6)
1338
            self._value = decode_twos_complement(bytes(self)[0:6], 48)
1339
        elif self._type == 6:
1340
            self._check_length(8)
1341
            self._value = decode_twos_complement(bytes(self)[0:8], 64)
1342
1343
        elif self._type == 7:
1344
            self._value = struct.unpack(r'>d', bytes(self)[0:8])[0]
1345
        elif self._type == 8:
1346
            self._value = 0
1347
        elif self._type == 9:
1348
            self._value = 1
1349
        elif self._type >= 13 and (1 == self._type % 2):
0 ignored issues
show
introduced by
Comparison should be (self._type) % (2) == 1
Loading history...
1350
            try:
1351
                self._value = bytes(self).decode('utf-8')
1352
            except UnicodeDecodeError:
1353
                raise MalformedField
1354
1355
        elif self._type >= 12 and (0 == self._type % 2):
0 ignored issues
show
introduced by
Comparison should be (self._type) % (2) == 0
Loading history...
1356
            self._value = bytes(self)
1357
1358
    def __bytes__(self):
1359
        return self._bytes
1360
1361
    def __repr__(self):
1362
        return "<Field {}: {} ({} bytes)>".format(
1363
            self._index, self._value, len(bytes(self))
1364
        )
1365
1366
    def __len__(self):
1367
        return len(bytes(self))
1368
1369
    @property
1370
    def index(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1371
        return self._index
1372
1373
    @property
1374
    def value(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1375
        return self._value
1376
1377
    @property
1378
    def serial_type(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1379
        return self._type
1380
1381
1382
class Varint(object):
0 ignored issues
show
Coding Style introduced by
This class should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1383
    def __init__(self, varint_bytes):
1384
        self._bytes = varint_bytes
1385
        self._len = 0
1386
        self._value = 0
1387
1388
        varint_bits = []
1389
        for b in self._bytes:
0 ignored issues
show
Coding Style Naming introduced by
The name b does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1390
            self._len += 1
1391
            if b & 0x80:
1392
                varint_bits.append(b & 0x7F)
1393
            else:
1394
                varint_bits.append(b)
1395
                break
1396
1397
        varint_twos_complement = 0
1398
        for position, b in enumerate(varint_bits[::-1]):
0 ignored issues
show
Coding Style Naming introduced by
The name b does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1399
            varint_twos_complement += b * (1 << (7*position))
1400
1401
        self._value = decode_twos_complement(
1402
            int.to_bytes(varint_twos_complement, 4, byteorder='big'), 64
1403
        )
1404
1405
    def __int__(self):
1406
        return self._value
1407
1408
    def __len__(self):
1409
        return self._len
1410
1411
    def __repr__(self):
1412
        return "<Varint {} ({} bytes)>".format(int(self), len(self))
1413
1414
1415
def decode_twos_complement(encoded, bit_length):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1416
    assert(0 == bit_length % 8)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
introduced by
Comparison should be (bit_length) % (8) == 0
Loading history...
1417
    encoded_int = int.from_bytes(encoded, byteorder='big')
1418
    mask = 2**(bit_length - 1)
1419
    value = -(encoded_int & mask) + (encoded_int & ~mask)
1420
    return value
1421
1422
1423
def gen_output_dir(db_path):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1424
    db_abspath = os.path.abspath(db_path)
1425
    db_dir, db_name = os.path.split(db_abspath)
1426
1427
    munged_name = db_name.replace('.', '_')
1428
    out_dir = os.path.join(db_dir, munged_name)
1429
    if not os.path.exists(out_dir):
1430
        return out_dir
1431
    suffix = 1
1432
    while suffix <= 10:
1433
        out_dir = os.path.join(db_dir, "{}_{}".format(munged_name, suffix))
1434
        if not os.path.exists(out_dir):
1435
            return out_dir
1436
        suffix += 1
1437
    raise SystemError(
1438
        "Unreasonable number of output directories for {}".format(db_path)
1439
    )
1440
1441
1442
def _load_db(sqlite_path):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1443
    _LOGGER.info("Processing %s", sqlite_path)
1444
1445
    load_heuristics()
1446
1447
    db = SQLite_DB(sqlite_path)
0 ignored issues
show
Coding Style Naming introduced by
The name db does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1448
    _LOGGER.info("Database: %r", db)
1449
1450
    db.populate_freelist_pages()
1451
    db.populate_ptrmap_pages()
1452
    db.populate_overflow_pages()
1453
1454
    # Should we aim to instantiate specialised b-tree objects here, or is the
1455
    # use of generic btree page objects acceptable?
1456
    db.populate_btree_pages()
1457
1458
    db.map_tables()
1459
1460
    # We need a first pass to process tables that are disconnected
1461
    # from their table's root page
1462
    db.reparent_orphaned_table_leaf_pages()
1463
1464
    # All pages should now be represented by specialised objects
1465
    assert(all(isinstance(p, Page) for p in db.pages.values()))
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
1466
    assert(not any(type(p) is Page for p in db.pages.values()))
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after assert.
Loading history...
introduced by
Using type() instead of isinstance() for a typecheck.
Loading history...
1467
    return db
1468
1469
1470
def dump_to_csv(args):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1471
    out_dir = args.output_dir or gen_output_dir(args.sqlite_path)
1472
    db = _load_db(args.sqlite_path)
0 ignored issues
show
Coding Style Naming introduced by
The name db does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1473
1474
    if os.path.exists(out_dir):
1475
        raise ValueError("Output directory {} exists!".format(out_dir))
1476
    os.mkdir(out_dir)
1477
1478
    for table_name in sorted(db.tables):
1479
        table = db.tables[table_name]
1480
        _LOGGER.info("Table \"%s\"", table)
1481
        table.recover_records()
1482
        table.csv_dump(out_dir)
1483
1484
1485
def undelete(args):
0 ignored issues
show
introduced by
'undelete' is too complex (11)
Loading history...
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1486
    db_abspath = os.path.abspath(args.sqlite_path)
1487
    db = _load_db(db_abspath)
0 ignored issues
show
Coding Style Naming introduced by
The name db does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1488
1489
    output_path = os.path.abspath(args.output_path)
1490
    if os.path.exists(output_path):
1491
        raise ValueError("Output file {} exists!".format(output_path))
1492
1493
    shutil.copyfile(db_abspath, output_path)
1494
    with sqlite3.connect(output_path) as output_db_connection:
1495
        cursor = output_db_connection.cursor()
1496
        for table_name in sorted(db.tables):
1497
            table = db.tables[table_name]
1498
            _LOGGER.info("Table \"%s\"", table)
1499
            table.recover_records()
1500
1501
            failed_inserts = 0
1502
            constraint_violations = 0
1503
            successful_inserts = 0
1504
            for leaf_page in table.leaves:
1505
                if not leaf_page.recovered_records:
1506
                    continue
1507
1508
                for record in leaf_page.recovered_records:
1509
                    insert_statement, values = table.build_insert_SQL(record)
1510
1511
                    try:
1512
                        cursor.execute(insert_statement, values)
1513
                    except sqlite3.IntegrityError:
1514
                        # We gotta soldier on, there's not much we can do if a
1515
                        # constraint is violated by this insert
1516
                        constraint_violations += 1
1517
                    except (
1518
                                sqlite3.ProgrammingError,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block.
Loading history...
1519
                                sqlite3.OperationalError,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block.
Loading history...
1520
                                sqlite3.InterfaceError
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block.
Loading history...
1521
                            ) as insert_ex:
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block.
Loading history...
1522
                        _LOGGER.warning(
1523
                            (
1524
                                "Caught %r while executing INSERT statement "
1525
                                "in \"%s\""
1526
                            ),
1527
                            insert_ex,
1528
                            table
1529
                        )
1530
                        failed_inserts += 1
1531
                        # pdb.set_trace()
1532
                    else:
1533
                        successful_inserts += 1
1534
            if failed_inserts > 0:
1535
                _LOGGER.warning(
1536
                    "%d failed INSERT statements in \"%s\"",
1537
                    failed_inserts, table
1538
                )
1539
            if constraint_violations > 0:
1540
                _LOGGER.warning(
1541
                    "%d constraint violations statements in \"%s\"",
1542
                    constraint_violations, table
1543
                )
1544
            _LOGGER.info(
1545
                "%d successful INSERT statements in \"%s\"",
1546
                successful_inserts, table
1547
            )
1548
1549
1550
def find_in_db(args):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1551
    db = _load_db(args.sqlite_path)
0 ignored issues
show
Coding Style Naming introduced by
The name db does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1552
    db.grep(args.needle)
1553
1554
1555
subcmd_actions = {
0 ignored issues
show
Coding Style Naming introduced by
The name subcmd_actions does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
1556
    'csv':  dump_to_csv,
1557
    'grep': find_in_db,
1558
    'undelete': undelete,
1559
}
1560
1561
1562
def subcmd_dispatcher(arg_ns):
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1563
    return subcmd_actions[arg_ns.subcmd](arg_ns)
1564
1565
1566
def main():
0 ignored issues
show
Coding Style introduced by
This function should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
1567
1568
    verbose_parser = argparse.ArgumentParser(add_help=False)
1569
    verbose_parser.add_argument(
1570
        '-v', '--verbose',
1571
        action='count',
1572
        help='Give *A LOT* more output.',
1573
    )
1574
1575
    cli_parser = argparse.ArgumentParser(
1576
        description=PROJECT_DESCRIPTION,
1577
        parents=[verbose_parser],
1578
    )
1579
1580
    subcmd_parsers = cli_parser.add_subparsers(
1581
        title='Subcommands',
1582
        description='%(prog)s implements the following subcommands:',
1583
        dest='subcmd',
1584
    )
1585
1586
    csv_parser = subcmd_parsers.add_parser(
1587
        'csv',
1588
        parents=[verbose_parser],
1589
        help='Dumps visible and recovered records to CSV files',
1590
        description=(
1591
            'Recovers as many records as possible from the database passed as '
1592
            'argument and outputs all visible and recovered records to CSV '
1593
            'files in output_dir'
1594
        ),
1595
    )
1596
    csv_parser.add_argument(
1597
        'sqlite_path',
1598
        help='sqlite3 file path'
1599
    )
1600
    csv_parser.add_argument(
1601
        'output_dir',
1602
        nargs='?',
1603
        default=None,
1604
        help='Output directory'
1605
    )
1606
1607
    grep_parser = subcmd_parsers.add_parser(
1608
        'grep',
1609
        parents=[verbose_parser],
1610
        help='Matches a string in one or more pages of the database',
1611
        description='Bar',
1612
    )
1613
    grep_parser.add_argument(
1614
        'sqlite_path',
1615
        help='sqlite3 file path'
1616
    )
1617
    grep_parser.add_argument(
1618
        'needle',
1619
        help='String to match in the database'
1620
    )
1621
1622
    undelete_parser = subcmd_parsers.add_parser(
1623
        'undelete',
1624
        parents=[verbose_parser],
1625
        help='Inserts recovered records into a copy of the database',
1626
        description=(
1627
            'Recovers as many records as possible from the database passed as '
1628
            'argument and inserts all recovered records into a copy of'
1629
            'the database.'
1630
        ),
1631
    )
1632
    undelete_parser.add_argument(
1633
        'sqlite_path',
1634
        help='sqlite3 file path'
1635
    )
1636
    undelete_parser.add_argument(
1637
        'output_path',
1638
        help='Output database path'
1639
    )
1640
1641
    cli_args = cli_parser.parse_args()
1642
    if cli_args.verbose:
1643
        _LOGGER.setLevel(logging.DEBUG)
1644
1645
    if cli_args.subcmd:
1646
        subcmd_dispatcher(cli_args)
1647
    else:
1648
        # No subcommand specified, print the usage and bail
1649
        cli_parser.print_help()
1650