1
|
|
|
# MIT License |
2
|
|
|
# |
3
|
|
|
# Copyright (c) 2017 Matt Boyer |
4
|
|
|
# |
5
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
6
|
|
|
# of this software and associated documentation files (the "Software"), to deal |
7
|
|
|
# in the Software without restriction, including without limitation the rights |
8
|
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
9
|
|
|
# copies of the Software, and to permit persons to whom the Software is |
10
|
|
|
# furnished to do so, subject to the following conditions: |
11
|
|
|
# |
12
|
|
|
# The above copyright notice and this permission notice shall be included in |
13
|
|
|
# all copies or substantial portions of the Software. |
14
|
|
|
# |
15
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16
|
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
18
|
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19
|
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20
|
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
21
|
|
|
# SOFTWARE. |
22
|
|
|
|
23
|
|
|
import os |
24
|
|
|
import re |
25
|
|
|
import stat |
26
|
|
|
import struct |
27
|
|
|
|
28
|
|
|
from . import constants |
29
|
|
|
from . import _LOGGER |
30
|
|
|
from .record import Record |
31
|
|
|
from .pages import ( |
32
|
|
|
Page, OverflowPage, FreelistLeafPage, FreelistTrunkPage, BTreePage, |
33
|
|
|
PtrmapPage |
34
|
|
|
) |
35
|
|
|
from .table import Table |
36
|
|
|
from .tuples import ( |
37
|
|
|
SQLite_header, SQLite_ptrmap_info, SQLite_master_record, type_specs |
38
|
|
|
) |
39
|
|
|
|
40
|
|
|
|
41
|
|
|
signatures = {} |
42
|
|
|
|
43
|
|
|
|
44
|
|
View Code Duplication |
class SQLite_DB(object): |
|
|
|
|
45
|
|
|
def __init__(self, path, heuristics_registry): |
46
|
|
|
self._path = path |
47
|
|
|
self._page_types = {} |
48
|
|
|
self._header = self.parse_header() |
49
|
|
|
self._registry = heuristics_registry |
50
|
|
|
|
51
|
|
|
self._page_cache = None |
52
|
|
|
# Actual page objects go here |
53
|
|
|
self._pages = {} |
54
|
|
|
self.build_page_cache() |
55
|
|
|
|
56
|
|
|
self._ptrmap = {} |
57
|
|
|
|
58
|
|
|
# TODO Do we need all of these? |
59
|
|
|
self._table_roots = {} |
60
|
|
|
self._page_tables = {} |
61
|
|
|
self._tables = {} |
62
|
|
|
self._table_columns = {} |
63
|
|
|
self._freelist_leaves = [] |
64
|
|
|
self._freelist_btree_pages = [] |
65
|
|
|
|
66
|
|
|
@property |
67
|
|
|
def ptrmap(self): |
68
|
|
|
return self._ptrmap |
69
|
|
|
|
70
|
|
|
@property |
71
|
|
|
def header(self): |
72
|
|
|
return self._header |
73
|
|
|
|
74
|
|
|
@property |
75
|
|
|
def pages(self): |
76
|
|
|
return self._pages |
77
|
|
|
|
78
|
|
|
@property |
79
|
|
|
def tables(self): |
80
|
|
|
return self._tables |
81
|
|
|
|
82
|
|
|
@property |
83
|
|
|
def freelist_leaves(self): |
84
|
|
|
return self._freelist_leaves |
85
|
|
|
|
86
|
|
|
@property |
87
|
|
|
def table_columns(self): |
88
|
|
|
return self._table_columns |
89
|
|
|
|
90
|
|
|
def page_bytes(self, page_idx): |
91
|
|
|
try: |
92
|
|
|
return self._page_cache[page_idx] |
93
|
|
|
except KeyError as ex: |
94
|
|
|
raise ValueError(f"No cache for page {page_idx}") from ex |
95
|
|
|
|
96
|
|
|
def map_table_page(self, page_idx, table): |
97
|
|
|
assert isinstance(page_idx, int) |
98
|
|
|
assert isinstance(table, Table) |
99
|
|
|
self._page_tables[page_idx] = table |
100
|
|
|
|
101
|
|
|
def get_page_table(self, page_idx): |
102
|
|
|
assert isinstance(page_idx, int) |
103
|
|
|
try: |
104
|
|
|
return self._page_tables[page_idx] |
105
|
|
|
except KeyError: |
106
|
|
|
return None |
107
|
|
|
|
108
|
|
|
def __repr__(self): |
109
|
|
|
return '<SQLite DB, page count: {} | page size: {}>'.format( |
110
|
|
|
self.header.size_in_pages, |
111
|
|
|
self.header.page_size |
112
|
|
|
) |
113
|
|
|
|
114
|
|
|
def parse_header(self): |
115
|
|
|
header_bytes = None |
116
|
|
|
file_size = None |
117
|
|
|
with open(self._path, 'br') as sqlite: |
118
|
|
|
header_bytes = sqlite.read(100) |
119
|
|
|
file_size = os.fstat(sqlite.fileno())[stat.ST_SIZE] |
120
|
|
|
|
121
|
|
|
if not header_bytes: |
122
|
|
|
raise ValueError("Couldn't read SQLite header") |
123
|
|
|
assert isinstance(header_bytes, bytes) |
124
|
|
|
# This DB header is always big-endian |
125
|
|
|
fields = SQLite_header(*struct.unpack( |
126
|
|
|
r'>16sHBBBBBBIIIIIIIIIIII20xII', |
127
|
|
|
header_bytes[:100] |
128
|
|
|
)) |
129
|
|
|
assert fields.page_size in constants.VALID_PAGE_SIZES |
130
|
|
|
db_size = fields.page_size * fields.size_in_pages |
131
|
|
|
assert db_size <= file_size |
132
|
|
|
assert (fields.page_size > 0) and \ |
133
|
|
|
(fields.file_change_counter == fields.version_valid) |
134
|
|
|
|
135
|
|
|
if file_size < 1073741824: |
136
|
|
|
_LOGGER.debug("No lock-byte page in this file!") |
137
|
|
|
|
138
|
|
|
if fields.first_freelist_trunk > 0: |
139
|
|
|
self._page_types[fields.first_freelist_trunk] = \ |
140
|
|
|
constants.FREELIST_TRUNK_PAGE |
141
|
|
|
_LOGGER.debug(fields) |
142
|
|
|
return fields |
143
|
|
|
|
144
|
|
|
def build_page_cache(self): |
145
|
|
|
# The SQLite docs use a numbering convention for pages where the |
146
|
|
|
# first page (the one that has the header) is page 1, with the next |
147
|
|
|
# ptrmap page being page 2, etc. |
148
|
|
|
page_cache = [None, ] |
149
|
|
|
with open(self._path, 'br') as sqlite: |
150
|
|
|
for page_idx in range(self._header.size_in_pages): |
151
|
|
|
page_offset = page_idx * self._header.page_size |
152
|
|
|
sqlite.seek(page_offset, os.SEEK_SET) |
153
|
|
|
page_cache.append(sqlite.read(self._header.page_size)) |
154
|
|
|
self._page_cache = page_cache |
155
|
|
|
for page_idx in range(1, len(self._page_cache)): |
156
|
|
|
# We want these to be temporary objects, to be replaced with |
157
|
|
|
# more specialised objects as parsing progresses |
158
|
|
|
self._pages[page_idx] = Page(page_idx, self) |
159
|
|
|
|
160
|
|
|
def populate_freelist_pages(self): |
161
|
|
|
if 0 == self._header.first_freelist_trunk: |
162
|
|
|
_LOGGER.debug("This database has no freelist trunk page") |
163
|
|
|
return |
164
|
|
|
|
165
|
|
|
_LOGGER.info("Parsing freelist pages") |
166
|
|
|
parsed_trunks = 0 |
167
|
|
|
parsed_leaves = 0 |
168
|
|
|
freelist_trunk_idx = self._header.first_freelist_trunk |
169
|
|
|
|
170
|
|
|
while freelist_trunk_idx != 0: |
171
|
|
|
_LOGGER.debug( |
172
|
|
|
"Parsing freelist trunk page %d", |
173
|
|
|
freelist_trunk_idx |
174
|
|
|
) |
175
|
|
|
|
176
|
|
|
# Set _page_types value for this page |
177
|
|
|
self._page_types[freelist_trunk_idx] = \ |
178
|
|
|
constants.FREELIST_TRUNK_PAGE |
179
|
|
|
|
180
|
|
|
trunk_bytes = bytes(self.pages[freelist_trunk_idx]) |
181
|
|
|
|
182
|
|
|
next_freelist_trunk_page_idx, num_leaf_pages = struct.unpack( |
183
|
|
|
r'>II', |
184
|
|
|
trunk_bytes[:8] |
185
|
|
|
) |
186
|
|
|
|
187
|
|
|
# Now that we know how long the array of freelist page pointers is, |
188
|
|
|
# let's read it again |
189
|
|
|
trunk_array = struct.unpack( |
190
|
|
|
r'>{count}I'.format(count=2+num_leaf_pages), |
191
|
|
|
trunk_bytes[:(4*(2+num_leaf_pages))] |
192
|
|
|
) |
193
|
|
|
|
194
|
|
|
# We're skipping the first entries as they are realy the next trunk |
195
|
|
|
# index and the leaf count |
196
|
|
|
# TODO Fix that |
197
|
|
|
leaves_in_trunk = [] |
198
|
|
|
for page_idx in trunk_array[2:]: |
199
|
|
|
# Let's prepare a specialised object for this freelist leaf |
200
|
|
|
# page |
201
|
|
|
leaf_page = FreelistLeafPage( |
202
|
|
|
page_idx, self, freelist_trunk_idx |
203
|
|
|
) |
204
|
|
|
leaves_in_trunk.append(leaf_page) |
205
|
|
|
self._freelist_leaves.append(page_idx) |
206
|
|
|
self._pages[page_idx] = leaf_page |
207
|
|
|
|
208
|
|
|
self._page_types[page_idx] = constants.FREELIST_LEAF_PAGE |
209
|
|
|
|
210
|
|
|
trunk_page = FreelistTrunkPage( |
211
|
|
|
freelist_trunk_idx, |
212
|
|
|
self, |
213
|
|
|
leaves_in_trunk |
214
|
|
|
) |
215
|
|
|
self._pages[freelist_trunk_idx] = trunk_page |
216
|
|
|
# We've parsed this trunk page |
217
|
|
|
parsed_trunks += 1 |
218
|
|
|
# ...And every leaf in it |
219
|
|
|
parsed_leaves += num_leaf_pages |
220
|
|
|
|
221
|
|
|
freelist_trunk_idx = next_freelist_trunk_page_idx |
222
|
|
|
|
223
|
|
|
assert (parsed_trunks + parsed_leaves) == self._header.freelist_pages |
224
|
|
|
_LOGGER.info( |
225
|
|
|
"Freelist summary: %d trunk pages, %d leaf pages", |
226
|
|
|
parsed_trunks, |
227
|
|
|
parsed_leaves |
228
|
|
|
) |
229
|
|
|
|
230
|
|
|
def populate_overflow_pages(self): |
231
|
|
|
# Knowledge of the overflow pages can come from the pointer map (easy), |
232
|
|
|
# or the parsing of individual cells in table leaf pages (hard) |
233
|
|
|
# |
234
|
|
|
# For now, assume we already have a page type dict populated from the |
235
|
|
|
# ptrmap |
236
|
|
|
_LOGGER.info("Parsing overflow pages") |
237
|
|
|
overflow_count = 0 |
238
|
|
|
for page_idx in sorted(self._page_types): |
239
|
|
|
page_type = self._page_types[page_idx] |
240
|
|
|
if page_type not in constants.OVERFLOW_PAGE_TYPES: |
241
|
|
|
continue |
242
|
|
|
overflow_page = OverflowPage(page_idx, self) |
243
|
|
|
self.pages[page_idx] = overflow_page |
244
|
|
|
overflow_count += 1 |
245
|
|
|
|
246
|
|
|
_LOGGER.info("Overflow summary: %d pages", overflow_count) |
247
|
|
|
|
248
|
|
|
def populate_ptrmap_pages(self): |
249
|
|
|
if self._header.largest_btree_page == 0: |
250
|
|
|
# We don't have ptrmap pages in this DB. That sucks. |
251
|
|
|
_LOGGER.warning("%r does not have ptrmap pages!", self) |
252
|
|
|
for page_idx in range(1, self._header.size_in_pages): |
253
|
|
|
self._page_types[page_idx] = constants.UNKNOWN_PAGE |
254
|
|
|
return |
255
|
|
|
|
256
|
|
|
_LOGGER.info("Parsing ptrmap pages") |
257
|
|
|
|
258
|
|
|
ptrmap_page_idx = 2 |
259
|
|
|
usable_size = self._header.page_size - self._header.reserved_length |
260
|
|
|
num_ptrmap_entries_in_page = usable_size // 5 |
261
|
|
|
ptrmap_page_indices = [] |
262
|
|
|
|
263
|
|
|
ptrmap_page_idx = 2 |
264
|
|
|
while ptrmap_page_idx <= self._header.size_in_pages: |
265
|
|
|
page_bytes = self._page_cache[ptrmap_page_idx] |
266
|
|
|
ptrmap_page_indices.append(ptrmap_page_idx) |
267
|
|
|
self._page_types[ptrmap_page_idx] = constants.PTRMAP_PAGE |
268
|
|
|
page_ptrmap_entries = {} |
269
|
|
|
|
270
|
|
|
ptrmap_bytes = page_bytes[:5 * num_ptrmap_entries_in_page] |
271
|
|
|
for entry_idx in range(num_ptrmap_entries_in_page): |
272
|
|
|
ptr_page_idx = ptrmap_page_idx + entry_idx + 1 |
273
|
|
|
page_type, page_ptr = struct.unpack( |
274
|
|
|
r'>BI', |
275
|
|
|
ptrmap_bytes[5*entry_idx:5*(entry_idx+1)] |
276
|
|
|
) |
277
|
|
|
if page_type == 0: |
278
|
|
|
break |
279
|
|
|
|
280
|
|
|
ptrmap_entry = SQLite_ptrmap_info( |
281
|
|
|
ptr_page_idx, page_type, page_ptr |
282
|
|
|
) |
283
|
|
|
assert ptrmap_entry.page_type in constants.PTRMAP_PAGE_TYPES |
284
|
|
|
if page_type == constants.BTREE_ROOT_PAGE: |
285
|
|
|
assert page_ptr == 0 |
286
|
|
|
self._page_types[ptr_page_idx] = page_type |
287
|
|
|
|
288
|
|
|
elif page_type == constants.FREELIST_PAGE: |
289
|
|
|
# Freelist pages are assumed to be known already |
290
|
|
|
assert self._page_types[ptr_page_idx] in \ |
291
|
|
|
constants.FREELIST_PAGE_TYPES |
292
|
|
|
assert page_ptr == 0 |
293
|
|
|
|
294
|
|
|
elif page_type == constants.FIRST_OFLOW_PAGE: |
295
|
|
|
assert page_ptr != 0 |
296
|
|
|
self._page_types[ptr_page_idx] = page_type |
297
|
|
|
|
298
|
|
|
elif page_type == constants.NON_FIRST_OFLOW_PAGE: |
299
|
|
|
assert page_ptr != 0 |
300
|
|
|
self._page_types[ptr_page_idx] = page_type |
301
|
|
|
|
302
|
|
|
elif page_type == constants.BTREE_NONROOT_PAGE: |
303
|
|
|
assert page_ptr != 0 |
304
|
|
|
self._page_types[ptr_page_idx] = page_type |
305
|
|
|
|
306
|
|
|
# _LOGGER.debug("%r", ptrmap_entry) |
307
|
|
|
self._ptrmap[ptr_page_idx] = ptrmap_entry |
308
|
|
|
page_ptrmap_entries[ptr_page_idx] = ptrmap_entry |
309
|
|
|
|
310
|
|
|
page = PtrmapPage(ptrmap_page_idx, self, page_ptrmap_entries) |
311
|
|
|
self._pages[ptrmap_page_idx] = page |
312
|
|
|
_LOGGER.debug("%r", page) |
313
|
|
|
ptrmap_page_idx += num_ptrmap_entries_in_page + 1 |
314
|
|
|
|
315
|
|
|
_LOGGER.info( |
316
|
|
|
"Ptrmap summary: %d pages, %r", |
317
|
|
|
len(ptrmap_page_indices), ptrmap_page_indices |
318
|
|
|
) |
319
|
|
|
|
320
|
|
|
def populate_btree_pages(self): |
321
|
|
|
# TODO Should this use table information instead of scanning all pages? |
322
|
|
|
page_idx = 1 |
323
|
|
|
while page_idx <= self._header.size_in_pages: |
324
|
|
|
try: |
325
|
|
|
if self._page_types[page_idx] in \ |
326
|
|
|
constants.NON_BTREE_PAGE_TYPES: |
327
|
|
|
page_idx += 1 |
328
|
|
|
continue |
329
|
|
|
except KeyError: |
330
|
|
|
pass |
331
|
|
|
|
332
|
|
|
try: |
333
|
|
|
# We need to pass in the singleton registry instance |
334
|
|
|
page_obj = BTreePage(page_idx, self, self._registry) |
335
|
|
|
except ValueError: |
336
|
|
|
# This page isn't a valid btree page. This can happen if we |
337
|
|
|
# don't have a ptrmap to guide us |
338
|
|
|
_LOGGER.warning( |
339
|
|
|
"Page %d (%s) is not a btree page", |
340
|
|
|
page_idx, |
341
|
|
|
self._page_types[page_idx] |
342
|
|
|
) |
343
|
|
|
page_idx += 1 |
344
|
|
|
continue |
345
|
|
|
|
346
|
|
|
page_obj.parse_cells() |
347
|
|
|
self._page_types[page_idx] = page_obj.page_type |
348
|
|
|
self._pages[page_idx] = page_obj |
349
|
|
|
page_idx += 1 |
350
|
|
|
|
351
|
|
|
def _parse_master_leaf_page(self, page): |
352
|
|
|
for cell_idx in page.cells: |
353
|
|
|
_, master_record = page.cells[cell_idx] |
354
|
|
|
assert isinstance(master_record, Record) |
355
|
|
|
fields = [ |
356
|
|
|
master_record.fields[idx].value for idx in master_record.fields |
357
|
|
|
] |
358
|
|
|
master_record = SQLite_master_record(*fields) |
359
|
|
|
if 'table' != master_record.type: |
360
|
|
|
continue |
361
|
|
|
|
362
|
|
|
self._table_roots[master_record.name] = \ |
363
|
|
|
self.pages[master_record.rootpage] |
364
|
|
|
|
365
|
|
|
# This record describes a table in the schema, which means it |
366
|
|
|
# includes a SQL statement that defines the table's columns |
367
|
|
|
# We need to parse the field names out of that statement |
368
|
|
|
assert master_record.sql.startswith('CREATE TABLE') |
369
|
|
|
columns_re = re.compile(r'^CREATE TABLE (\S+) \((.*)\)$') |
370
|
|
|
match = columns_re.match(master_record.sql) |
371
|
|
|
if match: |
372
|
|
|
assert match.group(1) == master_record.name |
373
|
|
|
column_list = match.group(2) |
374
|
|
|
csl_between_parens_re = re.compile(r'\([^)]+\)') |
375
|
|
|
expunged = csl_between_parens_re.sub('', column_list) |
376
|
|
|
|
377
|
|
|
cols = [ |
378
|
|
|
statement.strip() for statement in expunged.split(',') |
379
|
|
|
] |
380
|
|
|
cols = [ |
381
|
|
|
statement for statement in cols if not ( |
382
|
|
|
statement.startswith('PRIMARY') or |
383
|
|
|
statement.startswith('UNIQUE') |
384
|
|
|
) |
385
|
|
|
] |
386
|
|
|
columns = [col.split()[0] for col in cols] |
387
|
|
|
signature = [] |
388
|
|
|
|
389
|
|
|
# Some column definitions lack a type |
390
|
|
|
for col_def in cols: |
391
|
|
|
def_tokens = col_def.split() |
392
|
|
|
try: |
393
|
|
|
col_type = def_tokens[1] |
394
|
|
|
except IndexError: |
395
|
|
|
signature.append(object) |
396
|
|
|
continue |
397
|
|
|
|
398
|
|
|
_LOGGER.debug( |
399
|
|
|
"Column \"%s\" is defined as \"%s\"", |
400
|
|
|
def_tokens[0], col_type |
401
|
|
|
) |
402
|
|
|
try: |
403
|
|
|
signature.append(type_specs[col_type]) |
404
|
|
|
except KeyError: |
405
|
|
|
_LOGGER.warning("No native type for \"%s\"", col_def) |
406
|
|
|
signature.append(object) |
407
|
|
|
_LOGGER.info( |
408
|
|
|
"Signature for table \"%s\": %r", |
409
|
|
|
master_record.name, signature |
410
|
|
|
) |
411
|
|
|
signatures[master_record.name] = signature |
412
|
|
|
|
413
|
|
|
_LOGGER.info( |
414
|
|
|
"Columns for table \"%s\": %r", |
415
|
|
|
master_record.name, columns |
416
|
|
|
) |
417
|
|
|
self._table_columns[master_record.name] = columns |
418
|
|
|
|
419
|
|
|
def map_tables(self): |
420
|
|
|
first_page = self.pages[1] |
421
|
|
|
assert isinstance(first_page, BTreePage) |
422
|
|
|
|
423
|
|
|
master_table = Table('sqlite_master', self, first_page, signatures) |
424
|
|
|
self._table_columns.update(constants.SQLITE_TABLE_COLUMNS) |
425
|
|
|
|
426
|
|
|
for master_leaf in master_table.leaves: |
427
|
|
|
self._parse_master_leaf_page(master_leaf) |
428
|
|
|
|
429
|
|
|
assert all( |
430
|
|
|
isinstance(root, BTreePage) for root in self._table_roots.values() |
431
|
|
|
) |
432
|
|
|
assert all( |
433
|
|
|
root.parent is None for root in self._table_roots.values() |
434
|
|
|
) |
435
|
|
|
|
436
|
|
|
self.map_table_page(1, master_table) |
437
|
|
|
self._table_roots['sqlite_master'] = self.pages[1] |
438
|
|
|
|
439
|
|
|
for table_name, rootpage in self._table_roots.items(): |
440
|
|
|
try: |
441
|
|
|
table_obj = Table(table_name, self, rootpage, signatures) |
442
|
|
|
except Exception as ex: # pylint:disable=W0703 |
443
|
|
|
# pdb.set_trace() |
444
|
|
|
_LOGGER.warning( |
445
|
|
|
"Caught %r while instantiating table object for \"%s\"", |
446
|
|
|
ex, table_name |
447
|
|
|
) |
448
|
|
|
else: |
449
|
|
|
self._tables[table_name] = table_obj |
450
|
|
|
|
451
|
|
|
def reparent_orphaned_table_leaf_pages(self): |
452
|
|
|
reparented_pages = [] |
453
|
|
|
for page in self.pages.values(): |
454
|
|
|
if not isinstance(page, BTreePage): |
455
|
|
|
continue |
456
|
|
|
if page.page_type != "Table Leaf": |
457
|
|
|
continue |
458
|
|
|
|
459
|
|
|
table = page.table |
460
|
|
|
if not table: |
461
|
|
|
parent = page |
462
|
|
|
root_table = None |
463
|
|
|
while parent: |
464
|
|
|
root_table = parent.table |
465
|
|
|
parent = parent.parent |
466
|
|
|
if root_table is None: |
467
|
|
|
self._freelist_btree_pages.append(page) |
468
|
|
|
|
469
|
|
|
if root_table is None: |
470
|
|
|
if not page.cells: |
471
|
|
|
continue |
472
|
|
|
|
473
|
|
|
first_record = page.cells[0][1] |
474
|
|
|
matches = [] |
475
|
|
|
for table_name in signatures: |
476
|
|
|
# All records within a given page are for the same |
477
|
|
|
# table |
478
|
|
|
if self.tables[table_name].check_signature( |
479
|
|
|
first_record): |
480
|
|
|
matches.append(self.tables[table_name]) |
481
|
|
|
if not matches: |
482
|
|
|
_LOGGER.error( |
483
|
|
|
"Couldn't find a matching table for %r", |
484
|
|
|
page |
485
|
|
|
) |
486
|
|
|
continue |
487
|
|
|
if len(matches) > 1: |
488
|
|
|
_LOGGER.error( |
489
|
|
|
"Multiple matching tables for %r: %r", |
490
|
|
|
page, matches |
491
|
|
|
) |
492
|
|
|
continue |
493
|
|
|
elif len(matches) == 1: |
494
|
|
|
root_table = matches[0] |
495
|
|
|
|
496
|
|
|
_LOGGER.debug( |
497
|
|
|
"Reparenting %r to table \"%s\"", |
498
|
|
|
page, root_table.name |
499
|
|
|
) |
500
|
|
|
root_table.add_leaf(page) |
501
|
|
|
self.map_table_page(page.idx, root_table) |
502
|
|
|
reparented_pages.append(page) |
503
|
|
|
|
504
|
|
|
if reparented_pages: |
505
|
|
|
_LOGGER.info( |
506
|
|
|
"Reparented %d pages: %r", |
507
|
|
|
len(reparented_pages), [p.idx for p in reparented_pages] |
508
|
|
|
) |
509
|
|
|
|
510
|
|
|
def grep(self, needle): |
511
|
|
|
match_found = False |
512
|
|
|
page_idx = 1 |
513
|
|
|
needle_re = re.compile(needle.encode('utf-8')) |
514
|
|
|
while (page_idx <= self.header.size_in_pages): |
515
|
|
|
page = self.pages[page_idx] |
516
|
|
|
page_offsets = [] |
517
|
|
|
for match in needle_re.finditer(bytes(page)): |
518
|
|
|
needle_offset = match.start() |
519
|
|
|
page_offsets.append(needle_offset) |
520
|
|
|
if page_offsets: |
521
|
|
|
_LOGGER.info( |
522
|
|
|
"Found search term in page %r @ offset(s) %s", |
523
|
|
|
page, ', '.join(str(offset) for offset in page_offsets) |
524
|
|
|
) |
525
|
|
|
page_idx += 1 |
526
|
|
|
if not match_found: |
527
|
|
|
_LOGGER.warning( |
528
|
|
|
"Search term not found", |
529
|
|
|
) |
530
|
|
|
|