1
|
|
|
# frozen_string_literal: true |
2
|
|
|
|
3
|
1 |
|
module NoSE |
4
|
|
|
# A representation of materialized views over fields in an entity |
5
|
1 |
|
class Index |
6
|
1 |
|
attr_reader :hash_fields, :order_fields, :extra, :all_fields, :path, |
7
|
|
|
:entries, :entry_size, :size, :hash_count, :per_hash_count, |
8
|
|
|
:graph |
9
|
|
|
|
10
|
1 |
|
def initialize(hash_fields, order_fields, extra, graph, saved_key = nil) |
|
|
|
|
11
|
|
|
order_set = order_fields.to_set |
12
|
|
|
@hash_fields = hash_fields.to_set |
13
|
|
|
@order_fields = order_fields.delete_if { |e| hash_fields.include? e } |
14
|
|
|
@extra = extra.to_set.delete_if do |e| |
15
|
|
|
@hash_fields.include?(e) || order_set.include?(e) |
16
|
|
|
end |
17
|
|
|
@all_fields = Set.new.merge(@hash_fields).merge(order_set).merge(@extra) |
18
|
|
|
|
19
|
|
|
validate_hash_fields |
20
|
|
|
|
21
|
|
|
# Store whether this index is an identity |
22
|
|
|
@identity = @hash_fields == [ |
23
|
|
|
@hash_fields.first.parent.id_field |
24
|
|
|
].to_set && graph.nodes.size == 1 |
25
|
|
|
|
26
|
|
|
@graph = graph |
27
|
|
|
@path = graph.longest_path |
28
|
|
|
@path = nil unless @path.length == graph.size |
29
|
|
|
|
30
|
|
|
validate_graph |
31
|
|
|
|
32
|
|
|
build_hash saved_key |
33
|
|
|
end |
34
|
|
|
|
35
|
|
|
# Check if this index maps from the primary key to fields from one entity |
36
|
|
|
# @return [Boolean] |
37
|
1 |
|
def identity? |
38
|
|
|
@identity |
39
|
|
|
end |
40
|
|
|
|
41
|
|
|
# A simple key which uniquely identifies the index |
42
|
|
|
# @return [String] |
43
|
1 |
|
def key |
44
|
|
|
@key ||= "i#{Zlib.crc32 hash_str}" |
45
|
|
|
end |
46
|
|
|
|
47
|
|
|
# Look up a field in the index based on its ID |
48
|
|
|
# @return [Fields::Field] |
49
|
1 |
|
def [](field_id) |
50
|
|
|
@all_fields.find { |field| field.id == field_id } |
51
|
|
|
end |
52
|
|
|
|
53
|
|
|
# Check if this index is an ID graph |
54
|
|
|
# @return [Boolean] |
55
|
1 |
|
def id_graph? |
56
|
|
|
@hash_fields.all?(&:primary_key?) && @order_fields.all?(&:primary_key) |
57
|
|
|
end |
58
|
|
|
|
59
|
|
|
# Produce an index with the same fields but keyed by entities in the graph |
60
|
1 |
|
def to_id_graph |
61
|
|
|
return self if id_graph? |
62
|
|
|
|
63
|
|
|
all_ids = (@hash_fields.to_a + @order_fields + @extra.to_a) |
64
|
|
|
all_ids.map! { |f| f.parent.id_field }.uniq! |
65
|
|
|
|
66
|
|
|
hash_fields = [all_ids.first] |
67
|
|
|
order_fields = all_ids[1..-1] |
68
|
|
|
extra = @all_fields - hash_fields - order_fields |
69
|
|
|
|
70
|
|
|
Index.new hash_fields, order_fields, extra, @graph |
71
|
|
|
end |
72
|
|
|
|
73
|
|
|
# :nocov: |
74
|
1 |
|
def to_color |
75
|
|
|
fields = [@hash_fields, @order_fields, @extra].map do |field_group| |
76
|
|
|
'[' + field_group.map(&:inspect).join(', ') + ']' |
77
|
|
|
end |
78
|
|
|
|
79
|
|
|
"[magenta]#{key}[/] #{fields[0]} #{fields[1]} → #{fields[2]}" \ |
80
|
|
|
" [yellow]$#{size}[/]" \ |
81
|
|
|
" [magenta]#{@graph.inspect}[/]" |
82
|
|
|
end |
83
|
|
|
# :nocov: |
84
|
|
|
|
85
|
|
|
# Two indices are equal if they contain the same fields |
86
|
|
|
# @return [Boolean] |
87
|
1 |
|
def ==(other) |
88
|
|
|
hash == other.hash |
89
|
|
|
end |
90
|
1 |
|
alias eql? == |
91
|
|
|
|
92
|
|
|
# Hash based on the fields, their keys, and the graph |
93
|
|
|
# @return [String] |
94
|
1 |
|
def hash_str |
95
|
|
|
@hash_str ||= [ |
96
|
|
|
@hash_fields.map(&:id).sort, |
97
|
|
|
@order_fields.map(&:id), |
98
|
|
|
@extra.map(&:id).sort, |
99
|
|
|
@graph.unique_edges.map(&:canonical_params).sort! |
100
|
|
|
].to_s |
101
|
|
|
end |
102
|
|
|
|
103
|
1 |
|
def hash |
104
|
|
|
@hash ||= Zlib.crc32 hash_str |
105
|
|
|
end |
106
|
|
|
|
107
|
|
|
# Check if the index contains a given field |
108
|
|
|
# @return [Boolean] |
109
|
1 |
|
def contains_field?(field) |
110
|
|
|
@all_fields.include? field |
111
|
|
|
end |
112
|
|
|
|
113
|
1 |
|
private |
114
|
|
|
|
115
|
|
|
# Initialize the hash function and freeze ourselves |
116
|
|
|
# @return [void] |
117
|
1 |
|
def build_hash(saved_key) |
118
|
|
|
@key = saved_key |
119
|
|
|
|
120
|
|
|
hash |
121
|
|
|
key |
122
|
|
|
calculate_size |
123
|
|
|
freeze |
124
|
|
|
end |
125
|
|
|
|
126
|
|
|
# Check for valid hash fields in an index |
127
|
|
|
# @return [void] |
128
|
1 |
|
def validate_hash_fields |
129
|
|
|
fail InvalidIndexException, 'hash fields cannot be empty' \ |
130
|
|
|
if @hash_fields.empty? |
131
|
|
|
|
132
|
|
|
fail InvalidIndexException, 'hash fields can only involve one entity' \ |
133
|
|
|
if @hash_fields.map(&:parent).to_set.size > 1 |
134
|
|
|
end |
135
|
|
|
|
136
|
|
|
# Ensure an index is nonempty |
137
|
|
|
# @return [void] |
138
|
1 |
|
def validate_nonempty |
139
|
|
|
fail InvalidIndexException, 'must have fields other than hash fields' \ |
140
|
|
|
if @order_fields.empty? && @extra.empty? |
141
|
|
|
end |
142
|
|
|
|
143
|
|
|
# Ensure an index and its fields correspond to a valid graph |
144
|
|
|
# @return [void] |
145
|
1 |
|
def validate_graph |
146
|
|
|
validate_graph_entities |
147
|
|
|
validate_graph_keys |
148
|
|
|
end |
149
|
|
|
|
150
|
|
|
# Ensure the graph of the index is valid |
151
|
|
|
# @return [void] |
152
|
1 |
|
def validate_graph_entities |
153
|
|
|
entities = @all_fields.map(&:parent).to_set |
154
|
|
|
fail InvalidIndexException, 'graph entities do match index' \ |
155
|
|
|
unless entities == @graph.entities.to_set |
156
|
|
|
end |
157
|
|
|
|
158
|
|
|
# We must have the primary keys of the all entities in the graph |
159
|
|
|
# @return [void] |
160
|
1 |
|
def validate_graph_keys |
161
|
|
|
fail InvalidIndexException, 'missing graph entity keys' \ |
162
|
|
|
unless @graph.entities.map(&:id_field).to_set.subset? \ |
163
|
|
|
(@hash_fields + @order_fields).to_set |
164
|
|
|
end |
165
|
|
|
|
166
|
|
|
# Precalculate the size of the index |
167
|
|
|
# @return [void] |
168
|
1 |
|
def calculate_size |
169
|
|
|
@hash_count = @hash_fields.product_by(&:cardinality) |
170
|
|
|
|
171
|
|
|
# XXX This only works if foreign keys span all possible keys |
172
|
|
|
# Take the maximum possible count at each join and multiply |
173
|
|
|
@entries = @graph.entities.map(&:count).max |
174
|
|
|
@per_hash_count = (@entries * 1.0 / @hash_count) |
175
|
|
|
|
176
|
|
|
@entry_size = @all_fields.sum_by(&:size) |
177
|
|
|
@size = @entries * @entry_size |
178
|
|
|
end |
179
|
|
|
end |
180
|
|
|
|
181
|
|
|
# Thrown when something tries to create an invalid index |
182
|
1 |
|
class InvalidIndexException < StandardError |
183
|
|
|
end |
184
|
|
|
|
185
|
|
|
# Allow entities to create their own indices |
186
|
1 |
|
class Entity |
187
|
|
|
# Create a simple index which maps entity keys to other fields |
188
|
|
|
# @return [Index] |
189
|
1 |
|
def simple_index |
190
|
|
|
Index.new [id_field], [], fields.values - [id_field], |
191
|
|
|
QueryGraph::Graph.from_path([id_field]), name |
192
|
|
|
end |
193
|
|
|
end |
194
|
|
|
|
195
|
|
|
# Allow statements to materialize views |
196
|
1 |
|
class Statement |
197
|
|
|
# Construct an index which acts as a materialized view for a query |
198
|
|
|
# @return [Index] |
199
|
1 |
|
def materialize_view |
200
|
|
|
eq = materialized_view_eq join_order.first |
201
|
|
|
order_fields = materialized_view_order(join_order.first) - eq |
202
|
|
|
|
203
|
|
|
Index.new(eq, order_fields, |
204
|
|
|
all_fields - (@eq_fields + @order).to_set, @graph) |
205
|
|
|
end |
206
|
|
|
|
207
|
1 |
|
private |
208
|
|
|
|
209
|
|
|
# Get the fields used as parition keys for a materialized view |
210
|
|
|
# based over a given entity |
211
|
|
|
# @return [Array<Fields::Field>] |
212
|
1 |
|
def materialized_view_eq(hash_entity) |
213
|
|
|
eq = @eq_fields.select { |field| field.parent == hash_entity } |
214
|
|
|
eq = [join_order.last.id_field] if eq.empty? |
215
|
|
|
|
216
|
|
|
eq |
217
|
|
|
end |
218
|
|
|
|
219
|
|
|
# Get the ordered keys for a materialized view |
220
|
|
|
# @return [Array<Fields::Field>] |
221
|
1 |
|
def materialized_view_order(hash_entity) |
222
|
|
|
# Start the ordered fields with the equality predicates |
223
|
|
|
# on other entities, followed by all of the attributes |
224
|
|
|
# used in ordering, then the range field |
225
|
|
|
order_fields = @eq_fields.select do |field| |
226
|
|
|
field.parent != hash_entity |
227
|
|
|
end + @order |
228
|
|
|
if @range_field && [email protected]?(@range_field) |
229
|
|
|
order_fields << @range_field |
230
|
|
|
end |
231
|
|
|
|
232
|
|
|
# Ensure we include IDs of the final entity |
233
|
|
|
order_fields += join_order.map(&:id_field) |
234
|
|
|
|
235
|
|
|
order_fields.uniq |
236
|
|
|
end |
237
|
|
|
end |
238
|
|
|
end |
239
|
|
|
|