Issues (393)

lib/nose/search.rb (10 issues)

1
# frozen_string_literal: true
2
3 1
require_relative 'search/constraints'
4 1
require_relative 'search/problem'
5 1
require_relative 'search/results'
6
7 1
require 'logging'
8 1
require 'ostruct'
9 1
require 'tempfile'
10
11 1
module NoSE
12
  # ILP construction and schema search
13 1
  module Search
14
    # Searches for the optimal indices for a given workload
15 1
    class Search
16 1
      def initialize(workload, cost_model, objective = Objective::COST,
17
                     by_id_graph = false)
0 ignored issues
show
Prefer keyword arguments for arguments with a boolean default value; use by_id_graph: false instead of by_id_graph = false.
Loading history...
18 6
        @logger = Logging.logger['nose::search']
19 6
        @workload = workload
20 6
        @cost_model = cost_model
21 6
        @objective = objective
22 6
        @by_id_graph = by_id_graph
23
24
        # For now we only support optimization based on cost when grouping by
25
        # ID graphs, but support for other objectives is still feasible
26
        fail 'Only cost-based optimization allowed when using ID graphs' \
27 6
          if @by_id_graph && objective != Objective::COST
28
      end
29
30
      # Search for optimal indices using an ILP which searches for
31
      # non-overlapping indices
32
      # @return [Results]
33 1
      def search_overlap(indexes, max_space = Float::INFINITY)
34 6
        return if indexes.empty?
35
36
        # Get the costs of all queries and updates
37 6
        query_weights = combine_query_weights indexes
38 6
        costs, trees = query_costs query_weights, indexes
39 6
        update_costs, update_plans = update_costs trees, indexes
40
41 6
        log_search_start costs, query_weights
42
43
        solver_params = {
44 6
          max_space: max_space,
45
          costs: costs,
46
          update_costs: update_costs,
47
          cost_model: @cost_model,
48
          by_id_graph: @by_id_graph
49
        }
50 6
        search_result query_weights, indexes, solver_params, trees,
51
                      update_plans
52
      end
53
54 1
      private
55
56
      # Combine the weights of queries and statements
57
      # @return [void]
58 1
      def combine_query_weights(indexes)
59 6
        indexes = indexes.map(&:to_id_graph).uniq if @by_id_graph
60 6
        query_weights = Hash[@workload.support_queries(indexes).map do |query|
61 14
          [query, @workload.statement_weights[query.statement]]
62
        end]
63 6
        query_weights.merge!(@workload.statement_weights.select do |stmt, _|
64 9
          stmt.is_a? Query
65
        end.to_h)
66
67 6
        query_weights
68
      end
69
70
      # Produce a useful log message before starting the search
71
      # @return [void]
72 1
      def log_search_start(costs, query_weights)
73 6
        @logger.debug do
74
          "Costs: \n" + pp_s(costs) + "\n" \
0 ignored issues
show
Prefer string interpolation to string concatenation.
Loading history...
75
            "Search with queries:\n" + \
76
            query_weights.each_key.map.with_index do |query, i|
77
              "#{i} #{query.inspect}"
78
            end.join("\n")
79
        end
80
      end
81
82
      # Run the solver and get the results of search
83
      # @return [Results]
84 1
      def search_result(query_weights, indexes, solver_params, trees,
85
                        update_plans)
86
        # Solve the LP using MIPPeR
87 6
        result = solve_mipper query_weights.keys, indexes, **solver_params
88
89 4
        result.workload = @workload
90 4
        result.plans_from_trees trees
91
92
        # Select the relevant update plans
93 4
        update_plans = update_plans.values.flatten(1).select do |plan|
94 21
          result.indexes.include? plan.index
95
        end
96 4
        update_plans.each do |plan|
97 2
          plan.select_query_plans(&result.method(:select_plan))
98
        end
99 4
        result.update_plans = update_plans
100
101 4
        result.cost_model = @cost_model
102 4
        result.validate
103
104 4
        result
105
      end
106
107
      # Select the plans to use for a given set of indexes
108
      # @return [Array<Plans::QueryPlan>]
109 1
      def select_plans(trees, indexes)
110
        trees.map do |tree|
111
          # Exclude support queries since they will be in update plans
112
          query = tree.query
113
          next if query.is_a?(SupportQuery)
114
115
          # Select the exact plan to use for these indexes
116
          tree.select_using_indexes(indexes).min_by(&:cost)
117
        end.compact
118
      end
119
120
      # Solve the index selection problem using MIPPeR
121
      # @return [Results]
122 1
      def solve_mipper(queries, indexes, data)
123
        # Construct and solve the ILP
124 6
        problem = Problem.new queries, @workload.updates, data, @objective
125 6
        problem.solve
126
127
        # We won't get here if there's no valdi solution
128 4
        @logger.debug 'Found solution with total cost ' \
129
                      "#{problem.objective_value}"
130
131
        # Return the selected indices
132 4
        selected_indexes = problem.selected_indexes
133
134 4
        @logger.debug do
135
          "Selected indexes:\n" + selected_indexes.map do |index|
0 ignored issues
show
Prefer string interpolation to string concatenation.
Loading history...
136
            "#{indexes.index index} #{index.inspect}"
137
          end.join("\n")
138
        end
139
140 4
        problem.result
141
      end
142
143
      # Produce the cost of updates in the workload
144 1
      def update_costs(trees, indexes)
145 6
        planner = Plans::UpdatePlanner.new @workload.model, trees, @cost_model,
146
                                           @by_id_graph
147 8
        update_costs = Hash.new { |h, k| h[k] = {} }
148 8
        update_plans = Hash.new { |h, k| h[k] = [] }
149 6
        @workload.statements.each do |statement|
150 9
          next if statement.is_a? Query
151
152 2
          populate_update_costs planner, statement, indexes,
153
                                update_costs, update_plans
154
        end
155
156 6
        [update_costs, update_plans]
157
      end
158
159
      # Populate the cost of all necessary plans for the given satement
160
      # @return [void]
161 1
      def populate_update_costs(planner, statement, indexes,
162
                                update_costs, update_plans)
163 2
        planner.find_plans_for_update(statement, indexes).each do |plan|
164 21
          weight = @workload.statement_weights[statement]
165 21
          update_costs[statement][plan.index] = plan.update_cost * weight
166 21
          update_plans[statement] << plan
167
        end
168
      end
169
170
      # Get the cost of using each index for each query in a workload
171 1
      def query_costs(query_weights, indexes)
172 6
        planner = Plans::QueryPlanner.new @workload, indexes, @cost_model
173
174 6
        results = Parallel.map(query_weights) do |query, weight|
175 21
          query_cost planner, query, weight
176
        end
177 6
        costs = Hash[query_weights.each_key.map.with_index do |query, q|
178 21
          [query, results[q].first]
179
        end]
180
181 6
        [costs, results.map(&:last)]
182
      end
183
184
      # Get the cost for indices for an individual query
185 1
      def query_cost(planner, query, weight)
186 21
        query_costs = {}
187
188 21
        tree = planner.find_plans_for_query(query)
189 21
        tree.each do |plan|
190 109
          steps_by_index = []
191 109
          plan.each do |step|
192 128
            if step.is_a? Plans::IndexLookupPlanStep
193 122
              steps_by_index.push [step]
194
            else
195 6
              steps_by_index.last.push step
196
            end
197
          end
198
199 109
          populate_query_costs query_costs, steps_by_index, weight, query, tree
200
        end
201
202 21
        [query_costs, tree]
203
      end
204
205
      # Store the costs and indexes for this plan in a nested hash
206
      # @return [void]
207 1
      def populate_query_costs(query_costs, steps_by_index, weight,
0 ignored issues
show
The Assignment, Branch, Condition size for populate_query_costs is considered too high. [<14, 54, 13> 57.28/20]. The ABC size is based on assignments, branches (method calls), and conditions.
Loading history...
The method populate_query_costs seems to be too complex. Perceived cyclomatic complexity is 12 with a maxiumum of 10 permitted.
Loading history...
This method is 35 lines long. Your coding style permits a maximum length of 20.
Loading history...
Complexity Coding Style introduced by
The method populate_query_costs seems to be too complex. Perceived complexity is 13 with a maxiumum of 10 permitted.
Loading history...
208
                               query, tree)
209
        # The first key is the query and the second is the index
210
        #
211
        # The value is a two-element array with the indices which are
212
        # jointly used to answer a step in the query plan along with
213
        # the cost of all plan steps for the part of the query graph
214 109
        steps_by_index.each do |steps|
0 ignored issues
show
Block has too many lines. [33/25]
Loading history...
215
          # Get the indexes for these plan steps
216 122
          index_step = steps.first
217
218
          # Calculate the cost for just these steps in the plan
219 122
          cost = steps.sum_by(&:cost) * weight
220
221
          # Don't count the cost for sorting at the end
222 250
          sort_step = steps.find { |s| s.is_a? Plans::SortPlanStep }
223 122
          cost -= sort_step.cost * weight unless sort_step.nil?
224
225 122
          if query_costs.key? index_step.index
226 10
            current_cost = query_costs[index_step.index].last
227
228
            # We must always have the same cost
229 10
            if (current_cost - cost).abs >= 10E-6
230
              index = index_step.index
231
              p query
232
              puts "Index #{index.key} does not have equivalent cost"
233
              puts "Current cost: #{current_cost}, discovered cost: #{cost}"
234
235
              puts "\nCurrent steps"
236
              query_costs[index_step.index].first.each { |s| p s }
237
238
              puts "\nDiscovered steps"
239
              steps.each { |s| p s }
240
              puts
241
242
              puts '======================================='
243
              tree.sort_by(&:cost).each do |plan|
244
                next unless plan.indexes.include?(index_step.index)
0 ignored issues
show
Add empty line after guard clause.
Loading history...
245
                plan.each do |step|
246
                  print(format('%.3f', step.cost).rjust(7) + ' ')
0 ignored issues
show
Prefer string interpolation to string concatenation.
Loading history...
247
                  p step
248
                end
249
                puts "#{format('%.3f', plan.cost).rjust(7)} total"
250
                puts '======================================='
251
              end
252
253
              puts
254
              p tree
255
256
              fail
257
            end
258
          else
259
            # We either found a new plan or something cheaper
260 112
            query_costs[index_step.index] = [steps, cost]
261
          end
262
        end
263
      end
264
    end
265
  end
266
end
267