1 | # frozen_string_literal: true |
||
2 | |||
3 | 1 | module NoSE |
|
4 | # A representation of materialized views over fields in an entity |
||
5 | 1 | class Index |
|
6 | 1 | attr_reader :hash_fields, :order_fields, :extra, :all_fields, :path, |
|
7 | :entries, :entry_size, :size, :hash_count, :per_hash_count, |
||
8 | :graph |
||
9 | |||
10 | 1 | def initialize(hash_fields, order_fields, extra, graph, |
|
0 ignored issues
–
show
Coding Style
introduced
by
Loading history...
|
|||
11 | saved_key: nil) |
||
12 | 7560 | order_set = order_fields.to_set |
|
13 | 7560 | @hash_fields = hash_fields.to_set |
|
14 | 25179 | @order_fields = order_fields.delete_if { |e| hash_fields.include? e } |
|
15 | 7560 | @extra = extra.to_set.delete_if do |e| |
|
16 | 4292 | @hash_fields.include?(e) || order_set.include?(e) |
|
17 | end |
||
18 | 7560 | @all_fields = Set.new(@hash_fields).merge(order_set).merge(@extra) |
|
19 | |||
20 | 7560 | validate_hash_fields |
|
21 | |||
22 | # Store whether this index is an identity |
||
23 | 7558 | @identity = @hash_fields == [ |
|
24 | @hash_fields.first.parent.id_field |
||
25 | ].to_set && graph.nodes.size == 1 |
||
26 | |||
27 | 7558 | @graph = graph |
|
28 | 7558 | @path = graph.longest_path |
|
29 | 7558 | @path = nil unless @path.length == graph.size |
|
30 | |||
31 | 7558 | validate_graph |
|
32 | |||
33 | 7122 | build_hash saved_key |
|
34 | end |
||
35 | |||
36 | # Check if this index maps from the primary key to fields from one entity |
||
37 | # @return [Boolean] |
||
38 | 1 | def identity? |
|
39 | 2501 | @identity |
|
40 | end |
||
41 | |||
42 | # A simple key which uniquely identifies the index |
||
43 | # @return [String] |
||
44 | 1 | def key |
|
45 | 7364 | @key ||= "i#{Zlib.crc32 hash_str}" |
|
46 | end |
||
47 | |||
48 | # Look up a field in the index based on its ID |
||
49 | # @return [Fields::Field] |
||
50 | 1 | def [](field_id) |
|
51 | 6 | @all_fields.find { |field| field.id == field_id } |
|
52 | end |
||
53 | |||
54 | # Check if this index is an ID graph |
||
55 | # @return [Boolean] |
||
56 | 1 | def id_graph? |
|
57 | 45 | @hash_fields.all?(&:primary_key?) && @order_fields.all?(&:primary_key) |
|
58 | end |
||
59 | |||
60 | # Produce an index with the same fields but keyed by entities in the graph |
||
61 | 1 | def to_id_graph |
|
62 | 45 | return self if id_graph? |
|
63 | |||
64 | 17 | all_ids = (@hash_fields.to_a + @order_fields + @extra.to_a) |
|
65 | 83 | all_ids.map! { |f| f.parent.id_field }.uniq! |
|
66 | |||
67 | 17 | hash_fields = [all_ids.first] |
|
68 | 17 | order_fields = all_ids[1..-1] |
|
69 | 17 | extra = @all_fields - hash_fields - order_fields |
|
70 | |||
71 | 17 | Index.new hash_fields, order_fields, extra, @graph |
|
72 | end |
||
73 | |||
74 | # :nocov: |
||
75 | 1 | def to_color |
|
76 | 26 | fields = [@hash_fields, @order_fields, @extra].map do |field_group| |
|
77 | 78 | '[' + field_group.map(&:inspect).join(', ') + ']' |
|
78 | end |
||
79 | |||
80 | 26 | "[magenta]#{key}[/] #{fields[0]} #{fields[1]} → #{fields[2]}" \ |
|
81 | " [yellow]$#{size}[/]" \ |
||
82 | " [magenta]#{@graph.inspect}[/]" |
||
83 | end |
||
84 | # :nocov: |
||
85 | |||
86 | # Two indices are equal if they contain the same fields |
||
87 | # @return [Boolean] |
||
88 | 1 | def ==(other) |
|
89 | 7578 | hash == other.hash |
|
90 | end |
||
91 | 1 | alias eql? == |
|
92 | |||
93 | # Hash based on the fields, their keys, and the graph |
||
94 | # @return [String] |
||
95 | 1 | def hash_str |
|
96 | 14239 | @hash_str ||= [ |
|
97 | @hash_fields.map(&:id).sort!, |
||
98 | @order_fields.map(&:id), |
||
99 | @extra.map(&:id).sort!, |
||
100 | @graph.unique_edges.map(&:canonical_params).sort! |
||
101 | ].to_s.freeze |
||
102 | end |
||
103 | |||
104 | 1 | def hash |
|
105 | 35068 | @hash ||= Zlib.crc32 hash_str |
|
106 | end |
||
107 | |||
108 | # Check if the index contains a given field |
||
109 | # @return [Boolean] |
||
110 | 1 | def contains_field?(field) |
|
111 | 2 | @all_fields.include? field |
|
112 | end |
||
113 | |||
114 | 1 | private |
|
115 | |||
116 | # Initialize the hash function and freeze ourselves |
||
117 | # @return [void] |
||
118 | 1 | def build_hash(saved_key) |
|
119 | 7122 | @key = saved_key |
|
120 | |||
121 | 7122 | hash |
|
122 | 7122 | key |
|
123 | 7122 | calculate_size |
|
124 | 7122 | freeze |
|
125 | end |
||
126 | |||
127 | # Check for valid hash fields in an index |
||
128 | # @return [void] |
||
129 | 1 | def validate_hash_fields |
|
130 | fail InvalidIndexException, 'hash fields cannot be empty' \ |
||
131 | 7560 | if @hash_fields.empty? |
|
132 | |||
133 | 1 | fail InvalidIndexException, 'hash fields can only involve one entity' \ |
|
134 | 7559 | if @hash_fields.map(&:parent).to_set.size > 1 |
|
135 | end |
||
136 | |||
137 | # Ensure an index is nonempty |
||
138 | # @return [void] |
||
139 | 1 | def validate_nonempty |
|
140 | fail InvalidIndexException, 'must have fields other than hash fields' \ |
||
141 | if @order_fields.empty? && @extra.empty? |
||
142 | end |
||
143 | |||
144 | # Ensure an index and its fields correspond to a valid graph |
||
145 | # @return [void] |
||
146 | 1 | def validate_graph |
|
147 | 7558 | validate_graph_entities |
|
148 | 7122 | validate_graph_keys |
|
149 | end |
||
150 | |||
151 | # Ensure the graph of the index is valid |
||
152 | # @return [void] |
||
153 | 1 | def validate_graph_entities |
|
154 | 7558 | entities = @all_fields.map(&:parent).to_set |
|
155 | 436 | fail InvalidIndexException, 'graph entities do match index' \ |
|
156 | 7558 | unless entities == @graph.entities.to_set |
|
157 | end |
||
158 | |||
159 | # We must have the primary keys of the all entities in the graph |
||
160 | # @return [void] |
||
161 | 1 | def validate_graph_keys |
|
162 | fail InvalidIndexException, 'missing graph entity keys' \ |
||
163 | 7122 | unless @graph.entities.map(&:id_field).all? do |field| |
|
164 | 18064 | @hash_fields.include?(field) || @order_fields.include?(field) |
|
165 | end |
||
166 | end |
||
167 | |||
168 | # Precalculate the size of the index |
||
169 | # @return [void] |
||
170 | 1 | def calculate_size |
|
171 | 7122 | @hash_count = @hash_fields.product_by(&:cardinality) |
|
172 | |||
173 | # XXX This only works if foreign keys span all possible keys |
||
174 | # Take the maximum possible count at each join and multiply |
||
175 | 7122 | @entries = @graph.entities.map(&:count).max |
|
176 | 7122 | @per_hash_count = (@entries * 1.0 / @hash_count) |
|
177 | |||
178 | 7122 | @entry_size = @all_fields.sum_by(&:size) |
|
179 | 7122 | @size = @entries * @entry_size |
|
180 | end |
||
181 | end |
||
182 | |||
183 | # Thrown when something tries to create an invalid index |
||
184 | 1 | class InvalidIndexException < StandardError |
|
185 | end |
||
186 | |||
187 | # Allow entities to create their own indices |
||
188 | 1 | class Entity |
|
189 | # Create a simple index which maps entity keys to other fields |
||
190 | # @return [Index] |
||
191 | 1 | def simple_index |
|
192 | 36 | Index.new [id_field], [], fields.values - [id_field], |
|
193 | QueryGraph::Graph.from_path([id_field]), saved_key: name |
||
194 | end |
||
195 | end |
||
196 | |||
197 | # Allow statements to materialize views |
||
198 | 1 | class Statement |
|
199 | # Construct an index which acts as a materialized view for a query |
||
200 | # @return [Index] |
||
201 | 1 | def materialize_view |
|
202 | 152 | eq = materialized_view_eq join_order.first |
|
203 | 152 | order_fields = materialized_view_order(join_order.first) - eq |
|
204 | |||
205 | 152 | Index.new(eq, order_fields, |
|
206 | 152 | all_fields - (@eq_fields + @order).to_set, @graph) |
|
207 | end |
||
208 | |||
209 | 1 | private |
|
210 | |||
211 | # Get the fields used as parition keys for a materialized view |
||
212 | # based over a given entity |
||
213 | # @return [Array<Fields::Field>] |
||
214 | 1 | def materialized_view_eq(hash_entity) |
|
215 | 306 | eq = @eq_fields.select { |field| field.parent == hash_entity } |
|
216 | 152 | eq = [join_order.last.id_field] if eq.empty? |
|
217 | |||
218 | 152 | eq |
|
219 | end |
||
220 | |||
221 | # Get the ordered keys for a materialized view |
||
222 | # @return [Array<Fields::Field>] |
||
223 | 1 | def materialized_view_order(hash_entity) |
|
224 | # Start the ordered fields with the equality predicates |
||
225 | # on other entities, followed by all of the attributes |
||
226 | # used in ordering, then the range field |
||
227 | 152 | order_fields = @eq_fields.select do |field| |
|
0 ignored issues
–
show
|
|||
228 | 154 | field.parent != hash_entity |
|
229 | end + @order |
||
230 | 152 | if @range_field && [email protected]?(@range_field) |
|
0 ignored issues
–
show
|
|||
231 | 4 | order_fields << @range_field |
|
232 | end |
||
233 | |||
234 | # Ensure we include IDs of the final entity |
||
235 | 152 | order_fields += join_order.map(&:id_field) |
|
236 | |||
237 | 152 | order_fields.uniq |
|
238 | end |
||
239 | end |
||
240 | end |
||
241 |