michaelmior /
NoSE
| 1 | # frozen_string_literal: true |
||
| 2 | |||
| 3 | 1 | module NoSE |
|
| 4 | # A representation of materialized views over fields in an entity |
||
| 5 | 1 | class Index |
|
| 6 | 1 | attr_reader :hash_fields, :order_fields, :extra, :all_fields, :path, |
|
| 7 | :entries, :entry_size, :size, :hash_count, :per_hash_count, |
||
| 8 | :graph |
||
| 9 | |||
| 10 | 1 | def initialize(hash_fields, order_fields, extra, graph, |
|
|
0 ignored issues
–
show
Coding Style
introduced
by
Loading history...
|
|||
| 11 | saved_key: nil) |
||
| 12 | 7560 | order_set = order_fields.to_set |
|
| 13 | 7560 | @hash_fields = hash_fields.to_set |
|
| 14 | 25179 | @order_fields = order_fields.delete_if { |e| hash_fields.include? e } |
|
| 15 | 7560 | @extra = extra.to_set.delete_if do |e| |
|
| 16 | 4292 | @hash_fields.include?(e) || order_set.include?(e) |
|
| 17 | end |
||
| 18 | 7560 | @all_fields = Set.new(@hash_fields).merge(order_set).merge(@extra) |
|
| 19 | |||
| 20 | 7560 | validate_hash_fields |
|
| 21 | |||
| 22 | # Store whether this index is an identity |
||
| 23 | 7558 | @identity = @hash_fields == [ |
|
| 24 | @hash_fields.first.parent.id_field |
||
| 25 | ].to_set && graph.nodes.size == 1 |
||
| 26 | |||
| 27 | 7558 | @graph = graph |
|
| 28 | 7558 | @path = graph.longest_path |
|
| 29 | 7558 | @path = nil unless @path.length == graph.size |
|
| 30 | |||
| 31 | 7558 | validate_graph |
|
| 32 | |||
| 33 | 7122 | build_hash saved_key |
|
| 34 | end |
||
| 35 | |||
| 36 | # Check if this index maps from the primary key to fields from one entity |
||
| 37 | # @return [Boolean] |
||
| 38 | 1 | def identity? |
|
| 39 | 2501 | @identity |
|
| 40 | end |
||
| 41 | |||
| 42 | # A simple key which uniquely identifies the index |
||
| 43 | # @return [String] |
||
| 44 | 1 | def key |
|
| 45 | 7364 | @key ||= "i#{Zlib.crc32 hash_str}" |
|
| 46 | end |
||
| 47 | |||
| 48 | # Look up a field in the index based on its ID |
||
| 49 | # @return [Fields::Field] |
||
| 50 | 1 | def [](field_id) |
|
| 51 | 6 | @all_fields.find { |field| field.id == field_id } |
|
| 52 | end |
||
| 53 | |||
| 54 | # Check if this index is an ID graph |
||
| 55 | # @return [Boolean] |
||
| 56 | 1 | def id_graph? |
|
| 57 | 45 | @hash_fields.all?(&:primary_key?) && @order_fields.all?(&:primary_key) |
|
| 58 | end |
||
| 59 | |||
| 60 | # Produce an index with the same fields but keyed by entities in the graph |
||
| 61 | 1 | def to_id_graph |
|
| 62 | 45 | return self if id_graph? |
|
| 63 | |||
| 64 | 17 | all_ids = (@hash_fields.to_a + @order_fields + @extra.to_a) |
|
| 65 | 83 | all_ids.map! { |f| f.parent.id_field }.uniq! |
|
| 66 | |||
| 67 | 17 | hash_fields = [all_ids.first] |
|
| 68 | 17 | order_fields = all_ids[1..-1] |
|
|
0 ignored issues
–
show
|
|||
| 69 | 17 | extra = @all_fields - hash_fields - order_fields |
|
| 70 | |||
| 71 | 17 | Index.new hash_fields, order_fields, extra, @graph |
|
| 72 | end |
||
| 73 | |||
| 74 | # :nocov: |
||
| 75 | 1 | def to_color |
|
| 76 | 26 | fields = [@hash_fields, @order_fields, @extra].map do |field_group| |
|
| 77 | 78 | '[' + field_group.map(&:inspect).join(', ') + ']' |
|
|
0 ignored issues
–
show
|
|||
| 78 | end |
||
| 79 | |||
| 80 | 26 | "[magenta]#{key}[/] #{fields[0]} #{fields[1]} → #{fields[2]}" \ |
|
| 81 | " [yellow]$#{size}[/]" \ |
||
| 82 | " [magenta]#{@graph.inspect}[/]" |
||
| 83 | end |
||
| 84 | # :nocov: |
||
| 85 | |||
| 86 | # Two indices are equal if they contain the same fields |
||
| 87 | # @return [Boolean] |
||
| 88 | 1 | def ==(other) |
|
| 89 | 7578 | hash == other.hash |
|
| 90 | end |
||
| 91 | 1 | alias eql? == |
|
| 92 | |||
| 93 | # Hash based on the fields, their keys, and the graph |
||
| 94 | # @return [String] |
||
| 95 | 1 | def hash_str |
|
| 96 | 14239 | @hash_str ||= [ |
|
| 97 | @hash_fields.map(&:id).sort!, |
||
| 98 | @order_fields.map(&:id), |
||
| 99 | @extra.map(&:id).sort!, |
||
| 100 | @graph.unique_edges.map(&:canonical_params).sort! |
||
| 101 | ].to_s.freeze |
||
| 102 | end |
||
| 103 | |||
| 104 | 1 | def hash |
|
| 105 | 35068 | @hash ||= Zlib.crc32 hash_str |
|
| 106 | end |
||
| 107 | |||
| 108 | # Check if the index contains a given field |
||
| 109 | # @return [Boolean] |
||
| 110 | 1 | def contains_field?(field) |
|
| 111 | 2 | @all_fields.include? field |
|
| 112 | end |
||
| 113 | |||
| 114 | 1 | private |
|
| 115 | |||
| 116 | # Initialize the hash function and freeze ourselves |
||
| 117 | # @return [void] |
||
| 118 | 1 | def build_hash(saved_key) |
|
| 119 | 7122 | @key = saved_key |
|
| 120 | |||
| 121 | 7122 | hash |
|
| 122 | 7122 | key |
|
| 123 | 7122 | calculate_size |
|
| 124 | 7122 | freeze |
|
| 125 | end |
||
| 126 | |||
| 127 | # Check for valid hash fields in an index |
||
| 128 | # @return [void] |
||
| 129 | 1 | def validate_hash_fields |
|
| 130 | fail InvalidIndexException, 'hash fields cannot be empty' \ |
||
| 131 | 7560 | if @hash_fields.empty? |
|
| 132 | |||
| 133 | 1 | fail InvalidIndexException, 'hash fields can only involve one entity' \ |
|
| 134 | 7559 | if @hash_fields.map(&:parent).to_set.size > 1 |
|
| 135 | end |
||
| 136 | |||
| 137 | # Ensure an index is nonempty |
||
| 138 | # @return [void] |
||
| 139 | 1 | def validate_nonempty |
|
| 140 | fail InvalidIndexException, 'must have fields other than hash fields' \ |
||
| 141 | if @order_fields.empty? && @extra.empty? |
||
| 142 | end |
||
| 143 | |||
| 144 | # Ensure an index and its fields correspond to a valid graph |
||
| 145 | # @return [void] |
||
| 146 | 1 | def validate_graph |
|
| 147 | 7558 | validate_graph_entities |
|
| 148 | 7122 | validate_graph_keys |
|
| 149 | end |
||
| 150 | |||
| 151 | # Ensure the graph of the index is valid |
||
| 152 | # @return [void] |
||
| 153 | 1 | def validate_graph_entities |
|
| 154 | 7558 | entities = @all_fields.map(&:parent).to_set |
|
| 155 | 436 | fail InvalidIndexException, 'graph entities do match index' \ |
|
| 156 | 7558 | unless entities == @graph.entities.to_set |
|
| 157 | end |
||
| 158 | |||
| 159 | # We must have the primary keys of the all entities in the graph |
||
| 160 | # @return [void] |
||
| 161 | 1 | def validate_graph_keys |
|
| 162 | fail InvalidIndexException, 'missing graph entity keys' \ |
||
| 163 | 7122 | unless @graph.entities.map(&:id_field).all? do |field| |
|
| 164 | 18064 | @hash_fields.include?(field) || @order_fields.include?(field) |
|
| 165 | end |
||
| 166 | end |
||
| 167 | |||
| 168 | # Precalculate the size of the index |
||
| 169 | # @return [void] |
||
| 170 | 1 | def calculate_size |
|
| 171 | 7122 | @hash_count = @hash_fields.product_by(&:cardinality) |
|
| 172 | |||
| 173 | # XXX This only works if foreign keys span all possible keys |
||
| 174 | # Take the maximum possible count at each join and multiply |
||
| 175 | 7122 | @entries = @graph.entities.map(&:count).max |
|
| 176 | 7122 | @per_hash_count = (@entries * 1.0 / @hash_count) |
|
| 177 | |||
| 178 | 7122 | @entry_size = @all_fields.sum_by(&:size) |
|
| 179 | 7122 | @size = @entries * @entry_size |
|
| 180 | end |
||
| 181 | end |
||
| 182 | |||
| 183 | # Thrown when something tries to create an invalid index |
||
| 184 | 1 | class InvalidIndexException < StandardError |
|
| 185 | end |
||
| 186 | |||
| 187 | # Allow entities to create their own indices |
||
| 188 | 1 | class Entity |
|
| 189 | # Create a simple index which maps entity keys to other fields |
||
| 190 | # @return [Index] |
||
| 191 | 1 | def simple_index |
|
| 192 | 36 | Index.new [id_field], [], fields.values - [id_field], |
|
| 193 | QueryGraph::Graph.from_path([id_field]), saved_key: name |
||
| 194 | end |
||
| 195 | end |
||
| 196 | |||
| 197 | # Allow statements to materialize views |
||
| 198 | 1 | class Statement |
|
| 199 | # Construct an index which acts as a materialized view for a query |
||
| 200 | # @return [Index] |
||
| 201 | 1 | def materialize_view |
|
| 202 | 152 | eq = materialized_view_eq join_order.first |
|
| 203 | 152 | order_fields = materialized_view_order(join_order.first) - eq |
|
| 204 | |||
| 205 | 152 | Index.new(eq, order_fields, |
|
| 206 | 152 | all_fields - (@eq_fields + @order).to_set, @graph) |
|
| 207 | end |
||
| 208 | |||
| 209 | 1 | private |
|
| 210 | |||
| 211 | # Get the fields used as parition keys for a materialized view |
||
| 212 | # based over a given entity |
||
| 213 | # @return [Array<Fields::Field>] |
||
| 214 | 1 | def materialized_view_eq(hash_entity) |
|
| 215 | 306 | eq = @eq_fields.select { |field| field.parent == hash_entity } |
|
| 216 | 152 | eq = [join_order.last.id_field] if eq.empty? |
|
| 217 | |||
| 218 | 152 | eq |
|
| 219 | end |
||
| 220 | |||
| 221 | # Get the ordered keys for a materialized view |
||
| 222 | # @return [Array<Fields::Field>] |
||
| 223 | 1 | def materialized_view_order(hash_entity) |
|
| 224 | # Start the ordered fields with the equality predicates |
||
| 225 | # on other entities, followed by all of the attributes |
||
| 226 | # used in ordering, then the range field |
||
| 227 | 152 | order_fields = @eq_fields.select do |field| |
|
|
0 ignored issues
–
show
|
|||
| 228 | 154 | field.parent != hash_entity |
|
| 229 | end + @order |
||
| 230 | 152 | if @range_field && [email protected]?(@range_field) |
|
|
0 ignored issues
–
show
|
|||
| 231 | 4 | order_fields << @range_field |
|
| 232 | end |
||
| 233 | |||
| 234 | # Ensure we include IDs of the final entity |
||
| 235 | 152 | order_fields += join_order.map(&:id_field) |
|
| 236 | |||
| 237 | 152 | order_fields.uniq |
|
| 238 | end |
||
| 239 | end |
||
| 240 | end |
||
| 241 |