com.strider.datadefender.ModelDiscoveryConfig - Code Metrics - armenak/DataDefender - Measure and Improve Code Quality continuously with Scrutinizer

com.strider.datadefender.ModelDiscoveryConfig A
last analyzed 2021-07-07 23:11 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	45
Duplicated Lines	0 %

Test Coverage

Coverage

Importance

Changes	3
Bugs	0	Features	0

Metric	Value
eloc	32
c	3
b	0
f	0
dl	0
loc	45
ccs	0
cts	6
cp	0
rs	10
wmc	2

1 Method

Rating	Name	Duplication	Size	Complexity
A	setModels(List)	0	13	2

/*
 * Copyright 2014-2020, Armenak Grigoryan, and individual contributors as indicated
 * by the @authors tag. See the copyright.txt in the distribution for a
 * full listing of individual contributors.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 */
package com.strider.datadefender;

import com.strider.datadefender.discoverer.Discoverer;
import java.io.File;
import java.util.List;
import java.util.Optional;

import picocli.CommandLine.Option;

import lombok.Getter;
import lombok.extern.log4j.Log4j2;

/**
 * Database configuration options for picocli.
 * 
 * @author Zaahid Bateson
 */
@Getter
@Log4j2
public class ModelDiscoveryConfig {

    @Option(names = { "-l", "--limit" }, description = "Limit discovery to a set number of rows in a table", defaultValue = "1000")
    private Integer limit;

    private List<String> models;

    @Option(names = { "-e", "--extension" }, description = "Adds a call to an extension method "
        + "(e.g. com.strider.datadefender.specialcase.SinDetector.detectSin)")
    private List<String> extensions;

    @Option(names = { "--model-file" }, description = "Adds a custom made opennlp TokenizerME file for data discovery.")
    private List<File> fileModels;

    @Option(names = { "--token-model" }, description = "Override the default built-in token model (English tokens, "
        + "en-token.bin) with a custom token file for use by opennlp's TokenizerModel")
    private File tokenModel;

    @Option(names = { "--probability-threshold" }, description = "Minimum NLP match score to return results for", defaultValue = "0.55")
    private Double probabilityThreshold;

    @Option(names = { "--no-score-calculation" }, description = "If set, includes a column score", negatable = true)
    private Boolean calculateScore = true;

    @Option(names = { "--threshold-count" }, description = "Reports if number of rows found are greater than the defined threshold", defaultValue = "6")
    private Integer thresholdCount;

    @Option(names = { "--threshold-high" }, description = "Reports if number of high risk columns found are greater than the defined threshold", defaultValue = "3")
    private Integer thresholdHighRisk;

    @Option(names = { "-m", "--model" }, description = "Adds a built-in configured opennlp TokenizerME model for data discovery. "
                + "Available models are: ${AVAILABLE-MODELS}")
    public void setModels(List<String> models) {
        Optional<String> unmatched = models.stream().filter((m) -> !Discoverer.BUILT_IN_MODELS.containsKey(m)).findFirst();
        if (unmatched.isPresent()) {
            log.error(
                "A built-in model with the name \"{}\" does not exist. Please specify one of: {}",
                unmatched.get(),
                System.getProperty("AVAILABLE-MODELS")
            );
            throw new IllegalArgumentException("Unmatched built-in model.");
        }
        this.models = models;
    }
}


1			/*
2			* Copyright 2014-2020, Armenak Grigoryan, and individual contributors as indicated
3			* by the @authors tag. See the copyright.txt in the distribution for a
4			* full listing of individual contributors.
5			*
6			* This is free software; you can redistribute it and/or modify it
7			* under the terms of the GNU Lesser General Public License as
8			* published by the Free Software Foundation; either version 2.1 of
9			* the License, or (at your option) any later version.
10			*
11			* This software is distributed in the hope that it will be useful,
12			* but WITHOUT ANY WARRANTY; without even the implied warranty of
13			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14			* Lesser General Public License for more details.
15			*/
16			package com.strider.datadefender;
17
18			import com.strider.datadefender.discoverer.Discoverer;
19			import java.io.File;
20			import java.util.List;
21			import java.util.Optional;
22
23			import picocli.CommandLine.Option;
24
25			import lombok.Getter;
26			import lombok.extern.log4j.Log4j2;
27
28			/**
29			* Database configuration options for picocli.
30			*
31			* @author Zaahid Bateson
32			*/
33			@Getter
34			@Log4j2
35			public class ModelDiscoveryConfig {
36
37			@Option(names = { "-l", "--limit" }, description = "Limit discovery to a set number of rows in a table", defaultValue = "1000")
38			private Integer limit;
39
40			private List<String> models;
41
42			@Option(names = { "-e", "--extension" }, description = "Adds a call to an extension method "
43			+ "(e.g. com.strider.datadefender.specialcase.SinDetector.detectSin)")
44			private List<String> extensions;
45
46			@Option(names = { "--model-file" }, description = "Adds a custom made opennlp TokenizerME file for data discovery.")
47			private List<File> fileModels;
48
49			@Option(names = { "--token-model" }, description = "Override the default built-in token model (English tokens, "
50			+ "en-token.bin) with a custom token file for use by opennlp's TokenizerModel")
51			private File tokenModel;
52
53			@Option(names = { "--probability-threshold" }, description = "Minimum NLP match score to return results for", defaultValue = "0.55")
54			private Double probabilityThreshold;
55
56			@Option(names = { "--no-score-calculation" }, description = "If set, includes a column score", negatable = true)
57			private Boolean calculateScore = true;
58
59			@Option(names = { "--threshold-count" }, description = "Reports if number of rows found are greater than the defined threshold", defaultValue = "6")
60			private Integer thresholdCount;
61
62			@Option(names = { "--threshold-high" }, description = "Reports if number of high risk columns found are greater than the defined threshold", defaultValue = "3")
63			private Integer thresholdHighRisk;
64
65			@Option(names = { "-m", "--model" }, description = "Adds a built-in configured opennlp TokenizerME model for data discovery. "
66			+ "Available models are: ${AVAILABLE-MODELS}")
67			public void setModels(List<String> models) {
68			Optional<String> unmatched = models.stream().filter((m) -> !Discoverer.BUILT_IN_MODELS.containsKey(m)).findFirst();
69			if (unmatched.isPresent()) {
70			log.error(
71			"A built-in model with the name \"{}\" does not exist. Please specify one of: {}",
72			unmatched.get(),
73			System.getProperty("AVAILABLE-MODELS")
74			);
75			throw new IllegalArgumentException("Unmatched built-in model.");
76			}
77			this.models = models;
78			}
79			}
80

armenak / DataDefender

com.strider.datadefender.ModelDiscoveryConfig A last analyzed 2021-07-07 23:11 UTC

Complexity

Size/Duplication

Test Coverage

Importance

1 Method

Duplication Side-by-Side

Filter issues like

com.strider.datadefender.ModelDiscoveryConfig A
last analyzed 2021-07-07 23:11 UTC