1 | # frozen_string_literal: true |
||
2 | |||
3 | 1 | require_relative 'search/constraints' |
|
4 | 1 | require_relative 'search/problem' |
|
5 | 1 | require_relative 'search/results' |
|
6 | |||
7 | 1 | require 'logging' |
|
8 | 1 | require 'ostruct' |
|
9 | 1 | require 'tempfile' |
|
10 | |||
11 | 1 | module NoSE |
|
12 | # ILP construction and schema search |
||
13 | 1 | module Search |
|
14 | # Searches for the optimal indices for a given workload |
||
15 | 1 | class Search |
|
16 | 1 | def initialize(workload, cost_model, objective = Objective::COST, |
|
17 | by_id_graph = false) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
18 | 6 | @logger = Logging.logger['nose::search'] |
|
19 | 6 | @workload = workload |
|
20 | 6 | @cost_model = cost_model |
|
21 | 6 | @objective = objective |
|
22 | 6 | @by_id_graph = by_id_graph |
|
23 | |||
24 | # For now we only support optimization based on cost when grouping by |
||
25 | # ID graphs, but support for other objectives is still feasible |
||
26 | fail 'Only cost-based optimization allowed when using ID graphs' \ |
||
27 | 6 | if @by_id_graph && objective != Objective::COST |
|
28 | end |
||
29 | |||
30 | # Search for optimal indices using an ILP which searches for |
||
31 | # non-overlapping indices |
||
32 | # @return [Results] |
||
33 | 1 | def search_overlap(indexes, max_space = Float::INFINITY) |
|
34 | 6 | return if indexes.empty? |
|
35 | |||
36 | # Get the costs of all queries and updates |
||
37 | 6 | query_weights = combine_query_weights indexes |
|
38 | 6 | costs, trees = query_costs query_weights, indexes |
|
39 | 6 | update_costs, update_plans = update_costs trees, indexes |
|
40 | |||
41 | 6 | log_search_start costs, query_weights |
|
42 | |||
43 | solver_params = { |
||
44 | 6 | max_space: max_space, |
|
45 | costs: costs, |
||
46 | update_costs: update_costs, |
||
47 | cost_model: @cost_model, |
||
48 | by_id_graph: @by_id_graph |
||
49 | } |
||
50 | 6 | search_result query_weights, indexes, solver_params, trees, |
|
51 | update_plans |
||
52 | end |
||
53 | |||
54 | 1 | private |
|
55 | |||
56 | # Combine the weights of queries and statements |
||
57 | # @return [void] |
||
58 | 1 | def combine_query_weights(indexes) |
|
59 | 6 | indexes = indexes.map(&:to_id_graph).uniq if @by_id_graph |
|
60 | 6 | query_weights = Hash[@workload.support_queries(indexes).map do |query| |
|
61 | 14 | [query, @workload.statement_weights[query.statement]] |
|
62 | end] |
||
63 | 6 | query_weights.merge!(@workload.statement_weights.select do |stmt, _| |
|
64 | 9 | stmt.is_a? Query |
|
65 | end.to_h) |
||
66 | |||
67 | 6 | query_weights |
|
68 | end |
||
69 | |||
70 | # Produce a useful log message before starting the search |
||
71 | # @return [void] |
||
72 | 1 | def log_search_start(costs, query_weights) |
|
73 | 6 | @logger.debug do |
|
74 | "Costs: \n" + pp_s(costs) + "\n" \ |
||
0 ignored issues
–
show
|
|||
75 | "Search with queries:\n" + \ |
||
76 | query_weights.each_key.map.with_index do |query, i| |
||
77 | "#{i} #{query.inspect}" |
||
78 | end.join("\n") |
||
79 | end |
||
80 | end |
||
81 | |||
82 | # Run the solver and get the results of search |
||
83 | # @return [Results] |
||
84 | 1 | def search_result(query_weights, indexes, solver_params, trees, |
|
85 | update_plans) |
||
86 | # Solve the LP using MIPPeR |
||
87 | 6 | result = solve_mipper query_weights.keys, indexes, **solver_params |
|
88 | |||
89 | 4 | result.workload = @workload |
|
90 | 4 | result.plans_from_trees trees |
|
91 | |||
92 | # Select the relevant update plans |
||
93 | 4 | update_plans = update_plans.values.flatten(1).select do |plan| |
|
94 | 21 | result.indexes.include? plan.index |
|
95 | end |
||
96 | 4 | update_plans.each do |plan| |
|
97 | 2 | plan.select_query_plans(&result.method(:select_plan)) |
|
98 | end |
||
99 | 4 | result.update_plans = update_plans |
|
100 | |||
101 | 4 | result.cost_model = @cost_model |
|
102 | 4 | result.validate |
|
103 | |||
104 | 4 | result |
|
105 | end |
||
106 | |||
107 | # Select the plans to use for a given set of indexes |
||
108 | # @return [Array<Plans::QueryPlan>] |
||
109 | 1 | def select_plans(trees, indexes) |
|
110 | trees.map do |tree| |
||
111 | # Exclude support queries since they will be in update plans |
||
112 | query = tree.query |
||
113 | next if query.is_a?(SupportQuery) |
||
114 | |||
115 | # Select the exact plan to use for these indexes |
||
116 | tree.select_using_indexes(indexes).min_by(&:cost) |
||
117 | end.compact |
||
118 | end |
||
119 | |||
120 | # Solve the index selection problem using MIPPeR |
||
121 | # @return [Results] |
||
122 | 1 | def solve_mipper(queries, indexes, data) |
|
123 | # Construct and solve the ILP |
||
124 | 6 | problem = Problem.new queries, @workload.updates, data, @objective |
|
125 | 6 | problem.solve |
|
126 | |||
127 | # We won't get here if there's no valdi solution |
||
128 | 4 | @logger.debug 'Found solution with total cost ' \ |
|
129 | "#{problem.objective_value}" |
||
130 | |||
131 | # Return the selected indices |
||
132 | 4 | selected_indexes = problem.selected_indexes |
|
133 | |||
134 | 4 | @logger.debug do |
|
135 | "Selected indexes:\n" + selected_indexes.map do |index| |
||
0 ignored issues
–
show
|
|||
136 | "#{indexes.index index} #{index.inspect}" |
||
137 | end.join("\n") |
||
138 | end |
||
139 | |||
140 | 4 | problem.result |
|
141 | end |
||
142 | |||
143 | # Produce the cost of updates in the workload |
||
144 | 1 | def update_costs(trees, indexes) |
|
145 | 6 | planner = Plans::UpdatePlanner.new @workload.model, trees, @cost_model, |
|
146 | @by_id_graph |
||
147 | 8 | update_costs = Hash.new { |h, k| h[k] = {} } |
|
148 | 8 | update_plans = Hash.new { |h, k| h[k] = [] } |
|
149 | 6 | @workload.statements.each do |statement| |
|
150 | 9 | next if statement.is_a? Query |
|
151 | |||
152 | 2 | populate_update_costs planner, statement, indexes, |
|
153 | update_costs, update_plans |
||
154 | end |
||
155 | |||
156 | 6 | [update_costs, update_plans] |
|
157 | end |
||
158 | |||
159 | # Populate the cost of all necessary plans for the given satement |
||
160 | # @return [void] |
||
161 | 1 | def populate_update_costs(planner, statement, indexes, |
|
162 | update_costs, update_plans) |
||
163 | 2 | planner.find_plans_for_update(statement, indexes).each do |plan| |
|
164 | 21 | weight = @workload.statement_weights[statement] |
|
165 | 21 | update_costs[statement][plan.index] = plan.update_cost * weight |
|
166 | 21 | update_plans[statement] << plan |
|
167 | end |
||
168 | end |
||
169 | |||
170 | # Get the cost of using each index for each query in a workload |
||
171 | 1 | def query_costs(query_weights, indexes) |
|
172 | 6 | planner = Plans::QueryPlanner.new @workload, indexes, @cost_model |
|
173 | |||
174 | 6 | results = Parallel.map(query_weights) do |query, weight| |
|
175 | 21 | query_cost planner, query, weight |
|
176 | end |
||
177 | 6 | costs = Hash[query_weights.each_key.map.with_index do |query, q| |
|
178 | 21 | [query, results[q].first] |
|
179 | end] |
||
180 | |||
181 | 6 | [costs, results.map(&:last)] |
|
182 | end |
||
183 | |||
184 | # Get the cost for indices for an individual query |
||
185 | 1 | def query_cost(planner, query, weight) |
|
186 | 21 | query_costs = {} |
|
187 | |||
188 | 21 | tree = planner.find_plans_for_query(query) |
|
189 | 21 | tree.each do |plan| |
|
190 | 109 | steps_by_index = [] |
|
191 | 109 | plan.each do |step| |
|
192 | 128 | if step.is_a? Plans::IndexLookupPlanStep |
|
193 | 122 | steps_by_index.push [step] |
|
194 | else |
||
195 | 6 | steps_by_index.last.push step |
|
196 | end |
||
197 | end |
||
198 | |||
199 | 109 | populate_query_costs query_costs, steps_by_index, weight, query, tree |
|
200 | end |
||
201 | |||
202 | 21 | [query_costs, tree] |
|
203 | end |
||
204 | |||
205 | # Store the costs and indexes for this plan in a nested hash |
||
206 | # @return [void] |
||
207 | 1 | def populate_query_costs(query_costs, steps_by_index, weight, |
|
0 ignored issues
–
show
|
|||
208 | query, tree) |
||
209 | # The first key is the query and the second is the index |
||
210 | # |
||
211 | # The value is a two-element array with the indices which are |
||
212 | # jointly used to answer a step in the query plan along with |
||
213 | # the cost of all plan steps for the part of the query graph |
||
214 | 109 | steps_by_index.each do |steps| |
|
0 ignored issues
–
show
|
|||
215 | # Get the indexes for these plan steps |
||
216 | 122 | index_step = steps.first |
|
217 | |||
218 | # Calculate the cost for just these steps in the plan |
||
219 | 122 | cost = steps.sum_by(&:cost) * weight |
|
220 | |||
221 | # Don't count the cost for sorting at the end |
||
222 | 250 | sort_step = steps.find { |s| s.is_a? Plans::SortPlanStep } |
|
223 | 122 | cost -= sort_step.cost * weight unless sort_step.nil? |
|
224 | |||
225 | 122 | if query_costs.key? index_step.index |
|
226 | 10 | current_cost = query_costs[index_step.index].last |
|
227 | |||
228 | # We must always have the same cost |
||
229 | 10 | if (current_cost - cost).abs >= 10E-6 |
|
230 | index = index_step.index |
||
231 | p query |
||
232 | puts "Index #{index.key} does not have equivalent cost" |
||
233 | puts "Current cost: #{current_cost}, discovered cost: #{cost}" |
||
234 | |||
235 | puts "\nCurrent steps" |
||
236 | query_costs[index_step.index].first.each { |s| p s } |
||
237 | |||
238 | puts "\nDiscovered steps" |
||
239 | steps.each { |s| p s } |
||
240 | puts |
||
241 | |||
242 | puts '=======================================' |
||
243 | tree.sort_by(&:cost).each do |plan| |
||
244 | next unless plan.indexes.include?(index_step.index) |
||
0 ignored issues
–
show
|
|||
245 | plan.each do |step| |
||
246 | print(format('%.3f', step.cost).rjust(7) + ' ') |
||
0 ignored issues
–
show
|
|||
247 | p step |
||
248 | end |
||
249 | puts "#{format('%.3f', plan.cost).rjust(7)} total" |
||
250 | puts '=======================================' |
||
251 | end |
||
252 | |||
253 | puts |
||
254 | p tree |
||
255 | |||
256 | fail |
||
257 | end |
||
258 | else |
||
259 | # We either found a new plan or something cheaper |
||
260 | 112 | query_costs[index_step.index] = [steps, cost] |
|
261 | end |
||
262 | end |
||
263 | end |
||
264 | end |
||
265 | end |
||
266 | end |
||
267 |