Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
27 | class Search extends SearchDbConnected |
||
28 | { |
||
29 | /** |
||
30 | * @var Tokenizer |
||
31 | */ |
||
32 | protected $tokenizer; |
||
33 | protected $results = array(); |
||
34 | |||
35 | /** |
||
36 | * An array containing classes implementing \library\search\Filters |
||
37 | * These will be applied to all tokenizers |
||
38 | * @var array |
||
39 | */ |
||
40 | protected $filters = array( |
||
41 | 'DutchStopWords', |
||
42 | 'EnglishStopWords' |
||
43 | ); |
||
44 | |||
45 | /** |
||
46 | * Returns an array of SeachResult and / or SearchSuggestion objects, |
||
47 | * based on the tokens in the Tokenizer |
||
48 | * @param Tokenizer $tokenizer |
||
49 | * |
||
50 | * @return array |
||
51 | */ |
||
52 | public function getDocumentsForTokenizer(Tokenizer $tokenizer) |
||
53 | { |
||
54 | $this->tokenizer = $tokenizer; |
||
55 | $resultsPerTokens = $this->queryTokens(); |
||
56 | |||
57 | $flatResults = $this->flattenResults($resultsPerTokens); |
||
58 | $flatResults = $this->applyQueryCoordination($flatResults); |
||
59 | usort($flatResults, array($this, "scoreCompare")); |
||
60 | |||
61 | $flatResults = array_merge($this->getSearchSuggestions(), $flatResults); |
||
62 | |||
63 | return $flatResults; |
||
64 | } |
||
65 | |||
66 | /** |
||
67 | * Returns the amount of distinct documents |
||
68 | * that are currently in the search index. |
||
69 | * @return int |
||
70 | * @throws \Exception |
||
71 | */ |
||
72 | public function getIndexedDocuments() |
||
73 | { |
||
74 | $db = $this->getSearchDbHandle(); |
||
75 | $sql = ' |
||
76 | SELECT count(DISTINCT documentPath) as indexedDocuments |
||
77 | FROM term_frequency |
||
78 | '; |
||
79 | if (!$stmt = $db->query($sql)) { |
||
80 | $errorInfo = $db->errorInfo(); |
||
81 | $errorMsg = $errorInfo[2]; |
||
82 | throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
||
83 | } |
||
84 | $result = $stmt->fetch(\PDO::FETCH_COLUMN); |
||
85 | View Code Duplication | if (false === $result) { |
|
|
|||
86 | $errorInfo = $db->errorInfo(); |
||
87 | $errorMsg = $errorInfo[2]; |
||
88 | throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
||
89 | } |
||
90 | return intval($result); |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * Queries each token present in the Tokenizer |
||
95 | * and returns SearchResult objects for the found |
||
96 | * documents |
||
97 | * @return array |
||
98 | */ |
||
99 | private function queryTokens() |
||
100 | { |
||
101 | $tokens = $this->getTokens(); |
||
102 | |||
103 | $queryNorm = $this->getQueryNorm($tokens); |
||
104 | $results = array(); |
||
105 | foreach ($tokens as $token) { |
||
106 | $results[$token] = $this->getResultsForToken($token, $queryNorm); |
||
107 | } |
||
108 | return $results; |
||
109 | } |
||
110 | |||
111 | /** |
||
112 | * Applies the Filter objects in the the filter array to the |
||
113 | * tokens in the Tokenizer |
||
114 | * @param $tokens |
||
115 | * |
||
116 | * @return mixed |
||
117 | */ |
||
118 | View Code Duplication | protected function applyFilters($tokens) |
|
119 | { |
||
120 | foreach ($this->filters as $filterName) { |
||
121 | $filterClassName = '\library\search\filters\\' . $filterName; |
||
122 | $filter = new $filterClassName($tokens); |
||
123 | $tokens = $filter->getFilterResults(); |
||
124 | } |
||
125 | return $tokens; |
||
126 | } |
||
127 | |||
128 | /** |
||
129 | * Queries the search index for a given token |
||
130 | * and the query norm. |
||
131 | * @param $token |
||
132 | * @param $queryNorm |
||
133 | * |
||
134 | * @return array |
||
135 | * @throws \Exception |
||
136 | */ |
||
137 | public function getResultsForToken($token, $queryNorm) { |
||
138 | $db = $this->getSearchDbHandle(); |
||
139 | $sql = ' |
||
140 | SELECT (:queryNorm * |
||
141 | (SUM(term_frequency.frequency) --TF |
||
142 | * inverse_document_frequency.inverseDocumentFrequency -- IDF |
||
143 | * SUM(term_frequency.termNorm) -- norm |
||
144 | ) |
||
145 | )as score, |
||
146 | SUM(term_frequency.frequency) as TF, |
||
147 | inverse_document_frequency.inverseDocumentFrequency as IDF, |
||
148 | SUM(term_frequency.termNorm) as norm, |
||
149 | term_frequency.documentPath |
||
150 | FROM term_frequency |
||
151 | LEFT JOIN inverse_document_frequency |
||
152 | ON inverse_document_frequency.term = term_frequency.term |
||
153 | WHERE term_frequency.term = :query |
||
154 | GROUP BY term_frequency.documentPath, term_frequency.term |
||
155 | ORDER BY score DESC |
||
156 | '; |
||
157 | if(!$stmt = $db->prepare($sql)) { |
||
158 | throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
||
159 | } |
||
160 | $stmt->bindValue(':query', $token); |
||
161 | $stmt->bindValue(':queryNorm', $queryNorm); |
||
162 | View Code Duplication | if (!$stmt->execute()) { |
|
163 | throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
||
164 | } |
||
165 | return $stmt->fetchAll(\PDO::FETCH_CLASS, '\library\search\results\SearchResult'); |
||
166 | } |
||
167 | |||
168 | /** |
||
169 | * @param $resultsPerTokens |
||
170 | * |
||
171 | * @return array |
||
172 | */ |
||
173 | private function flattenResults($resultsPerTokens) |
||
193 | |||
194 | private function scoreCompare($a, $b) { |
||
200 | |||
201 | /** |
||
202 | * Calculates the query norm for all tokens in the Tokenizer |
||
203 | * @param $tokens |
||
204 | * |
||
205 | * @return int |
||
206 | * @throws \Exception |
||
207 | */ |
||
208 | private function getQueryNorm($tokens) |
||
230 | |||
231 | /** |
||
232 | * Applies query coordination to all results |
||
233 | * @param $flatResults |
||
234 | * |
||
235 | * @return mixed |
||
236 | */ |
||
237 | private function applyQueryCoordination($flatResults) |
||
249 | |||
250 | /** |
||
251 | * Uses the levenshtein algorithm to determine the term that is |
||
252 | * closest to the token that was input for the search |
||
253 | * @return array |
||
254 | * @throws \Exception |
||
255 | */ |
||
256 | private function getSearchSuggestions() |
||
286 | |||
287 | /** |
||
288 | * Retrieves all tokens from the tokenizer |
||
289 | * @return array |
||
290 | */ |
||
291 | private function getTokens() |
||
304 | } |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.