Conditions | 27 |
Total Lines | 178 |
Code Lines | 113 |
Lines | 0 |
Ratio | 0 % |
Tests | 0 |
CRAP Score | 756 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like com.strider.datadefender.discoverer.DatabaseDiscoverer.discoverAgainstSingleModel(Model) often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | /* |
||
248 | private List<ColumnMatch> discoverAgainstSingleModel(final Model model) |
||
249 | throws ParseException, |
||
250 | DataDefenderException, |
||
251 | IOException, |
||
252 | SQLException { |
||
253 | |||
254 | final IMetaData metaData = factory.fetchMetaData(); |
||
255 | final List<TableMetaData> map = metaData.getMetaData(); |
||
256 | |||
257 | // Start running NLP algorithms for each column and collect percentage |
||
258 | matches = new ArrayList<>(); |
||
259 | |||
260 | ColumnMatch specialCaseData; |
||
261 | final List<ColumnMatch> specialCaseDataList = new ArrayList(); |
||
262 | List<String> specialCaseFunctions = config.getExtensions(); |
||
263 | boolean specialCase = CollectionUtils.isNotEmpty(specialCaseFunctions); |
||
264 | |||
265 | log.info("Extension list: {}", specialCaseFunctions); |
||
266 | |||
267 | final ISqlBuilder sqlBuilder = factory.createSQLBuilder(); |
||
268 | List<Probability> probabilityList; |
||
269 | |||
270 | for (final TableMetaData table : map) { |
||
271 | |||
272 | final String tableName = table.getTableName(); |
||
273 | final String prefixed = sqlBuilder.prefixSchema(tableName); |
||
274 | final String cntQuery = "SELECT COUNT(*) FROM " + prefixed; |
||
275 | |||
276 | int numRows = config.getLimit(); |
||
277 | try ( |
||
278 | Statement stmt = factory.getConnection().createStatement(); |
||
279 | ResultSet rs = stmt.executeQuery(cntQuery) |
||
280 | ) { |
||
281 | rs.next(); |
||
282 | numRows = Math.min(numRows, rs.getInt(1)); |
||
283 | } catch (SQLException e) { |
||
284 | } |
||
285 | |||
286 | List<ColumnMetaData> cols = table.getColumns().stream() |
||
287 | .filter((c) -> !c.isForeignKey() && !c.isPrimaryKey()) |
||
288 | .collect(Collectors.toList()); |
||
289 | |||
290 | int numSteps = numRows * cols.size(); |
||
291 | try (ProgressBar pb = new ProgressBar(model.getName() + " in " + tableName, numSteps)) { |
||
292 | for (final ColumnMetaData data : cols) { |
||
293 | |||
294 | final String columnName = data.getColumnName(); |
||
295 | pb.setExtraMessage(columnName); |
||
296 | |||
297 | log.debug("Column type: [" + data.getColumnType() + "]"); |
||
298 | probabilityList = new ArrayList<>(); |
||
299 | log.info("Analyzing column [" + tableName + "].[" + columnName + "]"); |
||
300 | |||
301 | final String query = sqlBuilder.buildSelectWithLimit( |
||
302 | "SELECT " + columnName + " FROM " + prefixed + " WHERE " |
||
303 | + columnName + " IS NOT NULL", |
||
304 | config.getLimit() |
||
305 | ); |
||
306 | |||
307 | log.debug("Executing query against database: " + query); |
||
308 | try ( |
||
309 | Statement stmt = factory.getConnection().createStatement(); |
||
310 | ResultSet resultSet = stmt.executeQuery(query) |
||
311 | ) { |
||
312 | while (resultSet.next()) { |
||
313 | pb.step(); |
||
314 | if (Objects.equals(Blob.class, data.getColumnType())) { |
||
315 | continue; |
||
316 | } |
||
317 | if (model.getName().equals("location") && ClassUtils.isAssignable(data.getColumnType(), Number.class)) { |
||
318 | continue; |
||
319 | } |
||
320 | |||
321 | String sentence = ""; |
||
322 | if (Objects.equals(Clob.class, data.getColumnType())) { |
||
323 | Clob clob = resultSet.getClob(1); |
||
324 | InputStream is = clob.getAsciiStream(); |
||
325 | sentence = IOUtils.toString(is, StandardCharsets.UTF_8.name()); |
||
326 | } else { |
||
327 | sentence = resultSet.getString(1); |
||
328 | } |
||
329 | log.debug(sentence); |
||
330 | if (specialCaseFunctions != null && specialCase) { |
||
331 | try { |
||
332 | for (String specialCaseFunction : specialCaseFunctions) { |
||
333 | if ((sentence != null) && !sentence.isEmpty()) { |
||
334 | log.debug("sentence: " + sentence); |
||
335 | log.debug("data: " + data); |
||
336 | specialCaseData = (ColumnMatch) callExtension(specialCaseFunction, data, sentence); |
||
337 | if (specialCaseData != null) { |
||
338 | if (!specialCaseDataList.contains(specialCaseData)) { |
||
339 | log.debug("Adding new special case data: " + specialCaseData.toString()); |
||
340 | specialCaseDataList.add(specialCaseData); |
||
341 | } |
||
342 | } else { |
||
343 | log.debug("No special case data found"); |
||
344 | } |
||
345 | } |
||
346 | } |
||
347 | } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) { |
||
348 | log.error(e.toString()); |
||
349 | } |
||
350 | } |
||
351 | |||
352 | if ((sentence != null) &&!sentence.isEmpty()) { |
||
353 | String processingValue; |
||
354 | |||
355 | if (Objects.equals(Date.class, data.getColumnType()) |
||
356 | || Objects.equals(Timestamp.class, data.getColumnType()) |
||
357 | || Objects.equals(Time.class, data.getColumnType())) { |
||
358 | |||
359 | final DateFormat originalFormat = new SimpleDateFormat(sentence, Locale.ENGLISH); |
||
360 | final DateFormat targetFormat = new SimpleDateFormat("MMM d, yy", Locale.ENGLISH); |
||
361 | final java.util.Date date = originalFormat.parse(sentence); |
||
362 | |||
363 | processingValue = targetFormat.format(date); |
||
364 | } else { |
||
365 | processingValue = sentence; |
||
366 | } |
||
367 | |||
368 | // LOG.debug(sentence); |
||
369 | // Convert sentence into tokens |
||
370 | final String tokens[] = model.getTokenizer().tokenize(processingValue); |
||
371 | |||
372 | // Find names |
||
373 | final Span nameSpans[] = model.getNameFinder().find(tokens); |
||
374 | |||
375 | // find probabilities for names |
||
376 | final double[] spanProbs = model.getNameFinder().probs(nameSpans); |
||
377 | |||
378 | // Collect top X tokens with highest probability |
||
379 | // display names |
||
380 | for (int i = 0; i < nameSpans.length; i++) { |
||
381 | final String span = nameSpans[i].toString(); |
||
382 | |||
383 | if (span.length() > 2) { |
||
384 | log.debug("Span: " + span); |
||
385 | log.debug("Covered text is: " + tokens[nameSpans[i].getStart()]); |
||
386 | log.debug("Probability is: " + spanProbs[i]); |
||
387 | probabilityList.add(new Probability(tokens[nameSpans[i].getStart()], spanProbs[i])); |
||
388 | } |
||
389 | } |
||
390 | |||
391 | // From OpenNLP documentation: |
||
392 | // After every document clearAdaptiveData must be called to clear the adaptive data in the feature generators. |
||
393 | // Not calling clearAdaptiveData can lead to a sharp drop in the detection rate after a few documents. |
||
394 | model.getNameFinder().clearAdaptiveData(); |
||
395 | } |
||
396 | } |
||
397 | } catch (SQLException sqle) { |
||
398 | log.error(sqle.toString()); |
||
399 | } |
||
400 | |||
401 | final double averageProbability = calculateAverage(probabilityList); |
||
402 | |||
403 | if (averageProbability >= config.getProbabilityThreshold()) { |
||
404 | matches.add(new ColumnMatch( |
||
405 | data, |
||
406 | averageProbability, |
||
407 | model.getName(), |
||
408 | probabilityList) |
||
409 | ); |
||
410 | } |
||
411 | } |
||
412 | pb.stepTo(numSteps); |
||
413 | } |
||
414 | } |
||
415 | |||
416 | // Special processing |
||
417 | if (!specialCaseDataList.isEmpty()) { |
||
418 | log.debug("Special case data is processed :" + specialCaseDataList.toString()); |
||
419 | |||
420 | specialCaseDataList.forEach((specialData) -> { |
||
421 | matches.add(specialData); |
||
422 | }); |
||
423 | } |
||
424 | |||
425 | return matches; |
||
426 | } |
||
428 |