michaelmior /
NoSE
| 1 | # frozen_string_literal: true |
||
| 2 | |||
| 3 | 1 | require_relative 'search/constraints' |
|
| 4 | 1 | require_relative 'search/problem' |
|
| 5 | 1 | require_relative 'search/results' |
|
| 6 | |||
| 7 | 1 | require 'logging' |
|
| 8 | 1 | require 'ostruct' |
|
| 9 | 1 | require 'tempfile' |
|
| 10 | |||
| 11 | 1 | module NoSE |
|
| 12 | # ILP construction and schema search |
||
| 13 | 1 | module Search |
|
| 14 | # Searches for the optimal indices for a given workload |
||
| 15 | 1 | class Search |
|
| 16 | 1 | def initialize(workload, cost_model, objective = Objective::COST, |
|
| 17 | by_id_graph = false) |
||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 18 | 6 | @logger = Logging.logger['nose::search'] |
|
| 19 | 6 | @workload = workload |
|
| 20 | 6 | @cost_model = cost_model |
|
| 21 | 6 | @objective = objective |
|
| 22 | 6 | @by_id_graph = by_id_graph |
|
| 23 | |||
| 24 | # For now we only support optimization based on cost when grouping by |
||
| 25 | # ID graphs, but support for other objectives is still feasible |
||
| 26 | fail 'Only cost-based optimization allowed when using ID graphs' \ |
||
| 27 | 6 | if @by_id_graph && objective != Objective::COST |
|
| 28 | end |
||
| 29 | |||
| 30 | # Search for optimal indices using an ILP which searches for |
||
| 31 | # non-overlapping indices |
||
| 32 | # @return [Results] |
||
| 33 | 1 | def search_overlap(indexes, max_space = Float::INFINITY) |
|
| 34 | 6 | return if indexes.empty? |
|
| 35 | |||
| 36 | # Get the costs of all queries and updates |
||
| 37 | 6 | query_weights = combine_query_weights indexes |
|
| 38 | 6 | costs, trees = query_costs query_weights, indexes |
|
| 39 | 6 | update_costs, update_plans = update_costs trees, indexes |
|
| 40 | |||
| 41 | 6 | log_search_start costs, query_weights |
|
| 42 | |||
| 43 | solver_params = { |
||
| 44 | 6 | max_space: max_space, |
|
| 45 | costs: costs, |
||
| 46 | update_costs: update_costs, |
||
| 47 | cost_model: @cost_model, |
||
| 48 | by_id_graph: @by_id_graph |
||
| 49 | } |
||
| 50 | 6 | search_result query_weights, indexes, solver_params, trees, |
|
| 51 | update_plans |
||
| 52 | end |
||
| 53 | |||
| 54 | 1 | private |
|
| 55 | |||
| 56 | # Combine the weights of queries and statements |
||
| 57 | # @return [void] |
||
| 58 | 1 | def combine_query_weights(indexes) |
|
| 59 | 6 | indexes = indexes.map(&:to_id_graph).uniq if @by_id_graph |
|
| 60 | 6 | query_weights = Hash[@workload.support_queries(indexes).map do |query| |
|
| 61 | 14 | [query, @workload.statement_weights[query.statement]] |
|
| 62 | end] |
||
| 63 | 6 | query_weights.merge!(@workload.statement_weights.select do |stmt, _| |
|
| 64 | 9 | stmt.is_a? Query |
|
| 65 | end.to_h) |
||
| 66 | |||
| 67 | 6 | query_weights |
|
| 68 | end |
||
| 69 | |||
| 70 | # Produce a useful log message before starting the search |
||
| 71 | # @return [void] |
||
| 72 | 1 | def log_search_start(costs, query_weights) |
|
| 73 | 6 | @logger.debug do |
|
| 74 | "Costs: \n" + pp_s(costs) + "\n" \ |
||
|
0 ignored issues
–
show
|
|||
| 75 | "Search with queries:\n" + \ |
||
| 76 | query_weights.each_key.map.with_index do |query, i| |
||
| 77 | "#{i} #{query.inspect}" |
||
| 78 | end.join("\n") |
||
| 79 | end |
||
| 80 | end |
||
| 81 | |||
| 82 | # Run the solver and get the results of search |
||
| 83 | # @return [Results] |
||
| 84 | 1 | def search_result(query_weights, indexes, solver_params, trees, |
|
| 85 | update_plans) |
||
| 86 | # Solve the LP using MIPPeR |
||
| 87 | 6 | result = solve_mipper query_weights.keys, indexes, **solver_params |
|
| 88 | |||
| 89 | 4 | result.workload = @workload |
|
| 90 | 4 | result.plans_from_trees trees |
|
| 91 | |||
| 92 | # Select the relevant update plans |
||
| 93 | 4 | update_plans = update_plans.values.flatten(1).select do |plan| |
|
| 94 | 21 | result.indexes.include? plan.index |
|
| 95 | end |
||
| 96 | 4 | update_plans.each do |plan| |
|
| 97 | 2 | plan.select_query_plans(&result.method(:select_plan)) |
|
| 98 | end |
||
| 99 | 4 | result.update_plans = update_plans |
|
| 100 | |||
| 101 | 4 | result.cost_model = @cost_model |
|
| 102 | 4 | result.validate |
|
| 103 | |||
| 104 | 4 | result |
|
| 105 | end |
||
| 106 | |||
| 107 | # Select the plans to use for a given set of indexes |
||
| 108 | # @return [Array<Plans::QueryPlan>] |
||
| 109 | 1 | def select_plans(trees, indexes) |
|
| 110 | trees.map do |tree| |
||
| 111 | # Exclude support queries since they will be in update plans |
||
| 112 | query = tree.query |
||
| 113 | next if query.is_a?(SupportQuery) |
||
| 114 | |||
| 115 | # Select the exact plan to use for these indexes |
||
| 116 | tree.select_using_indexes(indexes).min_by(&:cost) |
||
| 117 | end.compact |
||
| 118 | end |
||
| 119 | |||
| 120 | # Solve the index selection problem using MIPPeR |
||
| 121 | # @return [Results] |
||
| 122 | 1 | def solve_mipper(queries, indexes, data) |
|
| 123 | # Construct and solve the ILP |
||
| 124 | 6 | problem = Problem.new queries, @workload.updates, data, @objective |
|
| 125 | 6 | problem.solve |
|
| 126 | |||
| 127 | # We won't get here if there's no valdi solution |
||
| 128 | 4 | @logger.debug 'Found solution with total cost ' \ |
|
| 129 | "#{problem.objective_value}" |
||
| 130 | |||
| 131 | # Return the selected indices |
||
| 132 | 4 | selected_indexes = problem.selected_indexes |
|
| 133 | |||
| 134 | 4 | @logger.debug do |
|
| 135 | "Selected indexes:\n" + selected_indexes.map do |index| |
||
|
0 ignored issues
–
show
|
|||
| 136 | "#{indexes.index index} #{index.inspect}" |
||
| 137 | end.join("\n") |
||
| 138 | end |
||
| 139 | |||
| 140 | 4 | problem.result |
|
| 141 | end |
||
| 142 | |||
| 143 | # Produce the cost of updates in the workload |
||
| 144 | 1 | def update_costs(trees, indexes) |
|
| 145 | 6 | planner = Plans::UpdatePlanner.new @workload.model, trees, @cost_model, |
|
| 146 | @by_id_graph |
||
| 147 | 8 | update_costs = Hash.new { |h, k| h[k] = {} } |
|
| 148 | 8 | update_plans = Hash.new { |h, k| h[k] = [] } |
|
| 149 | 6 | @workload.statements.each do |statement| |
|
| 150 | 9 | next if statement.is_a? Query |
|
| 151 | |||
| 152 | 2 | populate_update_costs planner, statement, indexes, |
|
| 153 | update_costs, update_plans |
||
| 154 | end |
||
| 155 | |||
| 156 | 6 | [update_costs, update_plans] |
|
| 157 | end |
||
| 158 | |||
| 159 | # Populate the cost of all necessary plans for the given satement |
||
| 160 | # @return [void] |
||
| 161 | 1 | def populate_update_costs(planner, statement, indexes, |
|
| 162 | update_costs, update_plans) |
||
| 163 | 2 | planner.find_plans_for_update(statement, indexes).each do |plan| |
|
| 164 | 21 | weight = @workload.statement_weights[statement] |
|
| 165 | 21 | update_costs[statement][plan.index] = plan.update_cost * weight |
|
| 166 | 21 | update_plans[statement] << plan |
|
| 167 | end |
||
| 168 | end |
||
| 169 | |||
| 170 | # Get the cost of using each index for each query in a workload |
||
| 171 | 1 | def query_costs(query_weights, indexes) |
|
| 172 | 6 | planner = Plans::QueryPlanner.new @workload, indexes, @cost_model |
|
| 173 | |||
| 174 | 6 | results = Parallel.map(query_weights) do |query, weight| |
|
| 175 | 21 | query_cost planner, query, weight |
|
| 176 | end |
||
| 177 | 6 | costs = Hash[query_weights.each_key.map.with_index do |query, q| |
|
| 178 | 21 | [query, results[q].first] |
|
| 179 | end] |
||
| 180 | |||
| 181 | 6 | [costs, results.map(&:last)] |
|
| 182 | end |
||
| 183 | |||
| 184 | # Get the cost for indices for an individual query |
||
| 185 | 1 | def query_cost(planner, query, weight) |
|
| 186 | 21 | query_costs = {} |
|
| 187 | |||
| 188 | 21 | tree = planner.find_plans_for_query(query) |
|
| 189 | 21 | tree.each do |plan| |
|
| 190 | 109 | steps_by_index = [] |
|
| 191 | 109 | plan.each do |step| |
|
| 192 | 128 | if step.is_a? Plans::IndexLookupPlanStep |
|
| 193 | 122 | steps_by_index.push [step] |
|
| 194 | else |
||
| 195 | 6 | steps_by_index.last.push step |
|
| 196 | end |
||
| 197 | end |
||
| 198 | |||
| 199 | 109 | populate_query_costs query_costs, steps_by_index, weight, query, tree |
|
| 200 | end |
||
| 201 | |||
| 202 | 21 | [query_costs, tree] |
|
| 203 | end |
||
| 204 | |||
| 205 | # Store the costs and indexes for this plan in a nested hash |
||
| 206 | # @return [void] |
||
| 207 | 1 | def populate_query_costs(query_costs, steps_by_index, weight, |
|
|
0 ignored issues
–
show
|
|||
| 208 | query, tree) |
||
| 209 | # The first key is the query and the second is the index |
||
| 210 | # |
||
| 211 | # The value is a two-element array with the indices which are |
||
| 212 | # jointly used to answer a step in the query plan along with |
||
| 213 | # the cost of all plan steps for the part of the query graph |
||
| 214 | 109 | steps_by_index.each do |steps| |
|
|
0 ignored issues
–
show
|
|||
| 215 | # Get the indexes for these plan steps |
||
| 216 | 122 | index_step = steps.first |
|
| 217 | |||
| 218 | # Calculate the cost for just these steps in the plan |
||
| 219 | 122 | cost = steps.sum_by(&:cost) * weight |
|
| 220 | |||
| 221 | # Don't count the cost for sorting at the end |
||
| 222 | 250 | sort_step = steps.find { |s| s.is_a? Plans::SortPlanStep } |
|
| 223 | 122 | cost -= sort_step.cost * weight unless sort_step.nil? |
|
| 224 | |||
| 225 | 122 | if query_costs.key? index_step.index |
|
| 226 | 10 | current_cost = query_costs[index_step.index].last |
|
| 227 | |||
| 228 | # We must always have the same cost |
||
| 229 | 10 | if (current_cost - cost).abs >= 10E-6 |
|
| 230 | index = index_step.index |
||
| 231 | p query |
||
| 232 | puts "Index #{index.key} does not have equivalent cost" |
||
| 233 | puts "Current cost: #{current_cost}, discovered cost: #{cost}" |
||
| 234 | |||
| 235 | puts "\nCurrent steps" |
||
| 236 | query_costs[index_step.index].first.each { |s| p s } |
||
| 237 | |||
| 238 | puts "\nDiscovered steps" |
||
| 239 | steps.each { |s| p s } |
||
| 240 | puts |
||
| 241 | |||
| 242 | puts '=======================================' |
||
| 243 | tree.sort_by(&:cost).each do |plan| |
||
| 244 | next unless plan.indexes.include?(index_step.index) |
||
|
0 ignored issues
–
show
|
|||
| 245 | plan.each do |step| |
||
| 246 | print(format('%.3f', step.cost).rjust(7) + ' ') |
||
|
0 ignored issues
–
show
|
|||
| 247 | p step |
||
| 248 | end |
||
| 249 | puts "#{format('%.3f', plan.cost).rjust(7)} total" |
||
| 250 | puts '=======================================' |
||
| 251 | end |
||
| 252 | |||
| 253 | puts |
||
| 254 | p tree |
||
| 255 | |||
| 256 | fail |
||
| 257 | end |
||
| 258 | else |
||
| 259 | # We either found a new plan or something cheaper |
||
| 260 | 112 | query_costs[index_step.index] = [steps, cost] |
|
| 261 | end |
||
| 262 | end |
||
| 263 | end |
||
| 264 | end |
||
| 265 | end |
||
| 266 | end |
||
| 267 |