| Conditions | 27 |
| Total Lines | 178 |
| Code Lines | 113 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 0 |
| CRAP Score | 756 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like com.strider.datadefender.discoverer.DatabaseDiscoverer.discoverAgainstSingleModel(Model) often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | /* |
||
| 248 | private List<ColumnMatch> discoverAgainstSingleModel(final Model model) |
||
| 249 | throws ParseException, |
||
| 250 | DataDefenderException, |
||
| 251 | IOException, |
||
| 252 | SQLException { |
||
| 253 | |||
| 254 | final IMetaData metaData = factory.fetchMetaData(); |
||
| 255 | final List<TableMetaData> map = metaData.getMetaData(); |
||
| 256 | |||
| 257 | // Start running NLP algorithms for each column and collect percentage |
||
| 258 | matches = new ArrayList<>(); |
||
| 259 | |||
| 260 | ColumnMatch specialCaseData; |
||
| 261 | final List<ColumnMatch> specialCaseDataList = new ArrayList(); |
||
| 262 | List<String> specialCaseFunctions = config.getExtensions(); |
||
| 263 | boolean specialCase = CollectionUtils.isNotEmpty(specialCaseFunctions); |
||
| 264 | |||
| 265 | log.info("Extension list: {}", specialCaseFunctions); |
||
| 266 | |||
| 267 | final ISqlBuilder sqlBuilder = factory.createSQLBuilder(); |
||
| 268 | List<Probability> probabilityList; |
||
| 269 | |||
| 270 | for (final TableMetaData table : map) { |
||
| 271 | |||
| 272 | final String tableName = table.getTableName(); |
||
| 273 | final String prefixed = sqlBuilder.prefixSchema(tableName); |
||
| 274 | final String cntQuery = "SELECT COUNT(*) FROM " + prefixed; |
||
| 275 | |||
| 276 | int numRows = config.getLimit(); |
||
| 277 | try ( |
||
| 278 | Statement stmt = factory.getConnection().createStatement(); |
||
| 279 | ResultSet rs = stmt.executeQuery(cntQuery) |
||
| 280 | ) { |
||
| 281 | rs.next(); |
||
| 282 | numRows = Math.min(numRows, rs.getInt(1)); |
||
| 283 | } catch (SQLException e) { |
||
| 284 | } |
||
| 285 | |||
| 286 | List<ColumnMetaData> cols = table.getColumns().stream() |
||
| 287 | .filter((c) -> !c.isForeignKey() && !c.isPrimaryKey()) |
||
| 288 | .collect(Collectors.toList()); |
||
| 289 | |||
| 290 | int numSteps = numRows * cols.size(); |
||
| 291 | try (ProgressBar pb = new ProgressBar(model.getName() + " in " + tableName, numSteps)) { |
||
| 292 | for (final ColumnMetaData data : cols) { |
||
| 293 | |||
| 294 | final String columnName = data.getColumnName(); |
||
| 295 | pb.setExtraMessage(columnName); |
||
| 296 | |||
| 297 | log.debug("Column type: [" + data.getColumnType() + "]"); |
||
| 298 | probabilityList = new ArrayList<>(); |
||
| 299 | log.info("Analyzing column [" + tableName + "].[" + columnName + "]"); |
||
| 300 | |||
| 301 | final String query = sqlBuilder.buildSelectWithLimit( |
||
| 302 | "SELECT " + columnName + " FROM " + prefixed + " WHERE " |
||
| 303 | + columnName + " IS NOT NULL", |
||
| 304 | config.getLimit() |
||
| 305 | ); |
||
| 306 | |||
| 307 | log.debug("Executing query against database: " + query); |
||
| 308 | try ( |
||
| 309 | Statement stmt = factory.getConnection().createStatement(); |
||
| 310 | ResultSet resultSet = stmt.executeQuery(query) |
||
| 311 | ) { |
||
| 312 | while (resultSet.next()) { |
||
| 313 | pb.step(); |
||
| 314 | if (Objects.equals(Blob.class, data.getColumnType())) { |
||
| 315 | continue; |
||
| 316 | } |
||
| 317 | if (model.getName().equals("location") && ClassUtils.isAssignable(data.getColumnType(), Number.class)) { |
||
| 318 | continue; |
||
| 319 | } |
||
| 320 | |||
| 321 | String sentence = ""; |
||
| 322 | if (Objects.equals(Clob.class, data.getColumnType())) { |
||
| 323 | Clob clob = resultSet.getClob(1); |
||
| 324 | InputStream is = clob.getAsciiStream(); |
||
| 325 | sentence = IOUtils.toString(is, StandardCharsets.UTF_8.name()); |
||
| 326 | } else { |
||
| 327 | sentence = resultSet.getString(1); |
||
| 328 | } |
||
| 329 | log.debug(sentence); |
||
| 330 | if (specialCaseFunctions != null && specialCase) { |
||
| 331 | try { |
||
| 332 | for (String specialCaseFunction : specialCaseFunctions) { |
||
| 333 | if ((sentence != null) && !sentence.isEmpty()) { |
||
| 334 | log.debug("sentence: " + sentence); |
||
| 335 | log.debug("data: " + data); |
||
| 336 | specialCaseData = (ColumnMatch) callExtension(specialCaseFunction, data, sentence); |
||
| 337 | if (specialCaseData != null) { |
||
| 338 | if (!specialCaseDataList.contains(specialCaseData)) { |
||
| 339 | log.debug("Adding new special case data: " + specialCaseData.toString()); |
||
| 340 | specialCaseDataList.add(specialCaseData); |
||
| 341 | } |
||
| 342 | } else { |
||
| 343 | log.debug("No special case data found"); |
||
| 344 | } |
||
| 345 | } |
||
| 346 | } |
||
| 347 | } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) { |
||
| 348 | log.error(e.toString()); |
||
| 349 | } |
||
| 350 | } |
||
| 351 | |||
| 352 | if ((sentence != null) &&!sentence.isEmpty()) { |
||
| 353 | String processingValue; |
||
| 354 | |||
| 355 | if (Objects.equals(Date.class, data.getColumnType()) |
||
| 356 | || Objects.equals(Timestamp.class, data.getColumnType()) |
||
| 357 | || Objects.equals(Time.class, data.getColumnType())) { |
||
| 358 | |||
| 359 | final DateFormat originalFormat = new SimpleDateFormat(sentence, Locale.ENGLISH); |
||
| 360 | final DateFormat targetFormat = new SimpleDateFormat("MMM d, yy", Locale.ENGLISH); |
||
| 361 | final java.util.Date date = originalFormat.parse(sentence); |
||
| 362 | |||
| 363 | processingValue = targetFormat.format(date); |
||
| 364 | } else { |
||
| 365 | processingValue = sentence; |
||
| 366 | } |
||
| 367 | |||
| 368 | // LOG.debug(sentence); |
||
| 369 | // Convert sentence into tokens |
||
| 370 | final String tokens[] = model.getTokenizer().tokenize(processingValue); |
||
| 371 | |||
| 372 | // Find names |
||
| 373 | final Span nameSpans[] = model.getNameFinder().find(tokens); |
||
| 374 | |||
| 375 | // find probabilities for names |
||
| 376 | final double[] spanProbs = model.getNameFinder().probs(nameSpans); |
||
| 377 | |||
| 378 | // Collect top X tokens with highest probability |
||
| 379 | // display names |
||
| 380 | for (int i = 0; i < nameSpans.length; i++) { |
||
| 381 | final String span = nameSpans[i].toString(); |
||
| 382 | |||
| 383 | if (span.length() > 2) { |
||
| 384 | log.debug("Span: " + span); |
||
| 385 | log.debug("Covered text is: " + tokens[nameSpans[i].getStart()]); |
||
| 386 | log.debug("Probability is: " + spanProbs[i]); |
||
| 387 | probabilityList.add(new Probability(tokens[nameSpans[i].getStart()], spanProbs[i])); |
||
| 388 | } |
||
| 389 | } |
||
| 390 | |||
| 391 | // From OpenNLP documentation: |
||
| 392 | // After every document clearAdaptiveData must be called to clear the adaptive data in the feature generators. |
||
| 393 | // Not calling clearAdaptiveData can lead to a sharp drop in the detection rate after a few documents. |
||
| 394 | model.getNameFinder().clearAdaptiveData(); |
||
| 395 | } |
||
| 396 | } |
||
| 397 | } catch (SQLException sqle) { |
||
| 398 | log.error(sqle.toString()); |
||
| 399 | } |
||
| 400 | |||
| 401 | final double averageProbability = calculateAverage(probabilityList); |
||
| 402 | |||
| 403 | if (averageProbability >= config.getProbabilityThreshold()) { |
||
| 404 | matches.add(new ColumnMatch( |
||
| 405 | data, |
||
| 406 | averageProbability, |
||
| 407 | model.getName(), |
||
| 408 | probabilityList) |
||
| 409 | ); |
||
| 410 | } |
||
| 411 | } |
||
| 412 | pb.stepTo(numSteps); |
||
| 413 | } |
||
| 414 | } |
||
| 415 | |||
| 416 | // Special processing |
||
| 417 | if (!specialCaseDataList.isEmpty()) { |
||
| 418 | log.debug("Special case data is processed :" + specialCaseDataList.toString()); |
||
| 419 | |||
| 420 | specialCaseDataList.forEach((specialData) -> { |
||
| 421 | matches.add(specialData); |
||
| 422 | }); |
||
| 423 | } |
||
| 424 | |||
| 425 | return matches; |
||
| 426 | } |
||
| 428 |