com.strider.datadefender.ModelDiscoveryConfig   A
last analyzed

Complexity

Total Complexity 2

Size/Duplication

Total Lines 45
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 3
Bugs 0 Features 0
Metric Value
eloc 32
c 3
b 0
f 0
dl 0
loc 45
ccs 0
cts 6
cp 0
rs 10
wmc 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A setModels(List) 0 13 2
1
/*
2
 * Copyright 2014-2020, Armenak Grigoryan, and individual contributors as indicated
3
 * by the @authors tag. See the copyright.txt in the distribution for a
4
 * full listing of individual contributors.
5
 *
6
 * This is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2.1 of
9
 * the License, or (at your option) any later version.
10
 *
11
 * This software is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
 * Lesser General Public License for more details.
15
 */
16
package com.strider.datadefender;
17
18
import com.strider.datadefender.discoverer.Discoverer;
19
import java.io.File;
20
import java.util.List;
21
import java.util.Optional;
22
23
import picocli.CommandLine.Option;
24
25
import lombok.Getter;
26
import lombok.extern.log4j.Log4j2;
27
28
/**
29
 * Database configuration options for picocli.
30
 * 
31
 * @author Zaahid Bateson
32
 */
33
@Getter
34
@Log4j2
35
public class ModelDiscoveryConfig {
36
37
    @Option(names = { "-l", "--limit" }, description = "Limit discovery to a set number of rows in a table", defaultValue = "1000")
38
    private Integer limit;
39
40
    private List<String> models;
41
42
    @Option(names = { "-e", "--extension" }, description = "Adds a call to an extension method "
43
        + "(e.g. com.strider.datadefender.specialcase.SinDetector.detectSin)")
44
    private List<String> extensions;
45
46
    @Option(names = { "--model-file" }, description = "Adds a custom made opennlp TokenizerME file for data discovery.")
47
    private List<File> fileModels;
48
49
    @Option(names = { "--token-model" }, description = "Override the default built-in token model (English tokens, "
50
        + "en-token.bin) with a custom token file for use by opennlp's TokenizerModel")
51
    private File tokenModel;
52
53
    @Option(names = { "--probability-threshold" }, description = "Minimum NLP match score to return results for", defaultValue = "0.55")
54
    private Double probabilityThreshold;
55
56
    @Option(names = { "--no-score-calculation" }, description = "If set, includes a column score", negatable = true)
57
    private Boolean calculateScore = true;
58
59
    @Option(names = { "--threshold-count" }, description = "Reports if number of rows found are greater than the defined threshold", defaultValue = "6")
60
    private Integer thresholdCount;
61
62
    @Option(names = { "--threshold-high" }, description = "Reports if number of high risk columns found are greater than the defined threshold", defaultValue = "3")
63
    private Integer thresholdHighRisk;
64
65
    @Option(names = { "-m", "--model" }, description = "Adds a built-in configured opennlp TokenizerME model for data discovery. "
66
                + "Available models are: ${AVAILABLE-MODELS}")
67
    public void setModels(List<String> models) {
68
        Optional<String> unmatched = models.stream().filter((m) -> !Discoverer.BUILT_IN_MODELS.containsKey(m)).findFirst();
69
        if (unmatched.isPresent()) {
70
            log.error(
71
                "A built-in model with the name \"{}\" does not exist. Please specify one of: {}",
72
                unmatched.get(),
73
                System.getProperty("AVAILABLE-MODELS")
74
            );
75
            throw new IllegalArgumentException("Unmatched built-in model.");
76
        }
77
        this.models = models;
78
    }
79
}
80