Completed
Branch master (31ad94)
by Zaahid
02:42
created

detectSin(ColumnMetaData,String)   A

Complexity

Conditions 5

Size

Total Lines 27
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 30

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 5
eloc 18
c 2
b 0
f 0
dl 0
loc 27
ccs 0
cts 10
cp 0
crap 30
rs 9.0333
1
/**
2
 * Copyright 2014-2018, Armenak Grigoryan, and individual contributors as indicated
3
 * by the @authors tag. See the copyright.txt in the distribution for a
4
 * full listing of individual contributors.
5
 *
6
 * This is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2.1 of
9
 * the License, or (at your option) any later version.
10
 *
11
 * This software is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
 * Lesser General Public License for more details.
15
 */
16
package com.strider.datadefender.specialcase;
17
18
import com.strider.datadefender.discoverer.Discoverer.ColumnMatch;
19
import com.strider.datadefender.discoverer.Probability;
20
import com.strider.datadefender.database.metadata.TableMetaData.ColumnMetaData;
21
import com.strider.datadefender.extensions.BiographicFunctions;
22
import com.strider.datadefender.file.metadata.FileMatchMetaData;
23
import java.util.ArrayList;
24
25
import java.util.List;
26
import java.util.Objects;
27
import lombok.extern.log4j.Log4j2;
28
29
import org.apache.commons.lang3.StringUtils;
30
31
32
/**
33
 * @author Armenak Grigoryan
34
 */
35
@Log4j2
36
public class SinDetector implements SpecialCase {
37
38
    public static ColumnMatch detectSin(final ColumnMetaData data, final String text) {
39
        
40
        String sinValue = text;
41
42
        if (
43
            StringUtils.isNotBlank(sinValue)
44
            && (
45
                Objects.equals(String.class, data.getColumnType())
46
                || Number.class.isAssignableFrom(data.getColumnType())
47
            )
48
        ) {
49
            if (Objects.equals(String.class, data.getColumnType())) {
50
                sinValue = sinValue.replaceAll("\\D+", "");
51
            }
52
53
            if (isValidSIN(sinValue)) {
54
                log.info("SIN detected: " + sinValue + " in " + data.getTable().getTableName() + "." + data.getColumnName());
55
                return new ColumnMatch(
56
                    data,
57
                    1,
58
                    "sin",
59
                    List.of(new Probability(sinValue, 1.00))
60
                );
61
            }
62
        }
63
64
        return null;
65
    }
66
67
    public static FileMatchMetaData detectSin(final FileMatchMetaData metaData, final String text) {
68
        String sinValue = "";
69
70
        if (StringUtils.isNotBlank(text)) {
71
            sinValue = text;
72
        }
73
74
        log.debug("Trying to find SIN in file " + metaData.getFileName() + " : " + sinValue);
75
        final BiographicFunctions bf = new BiographicFunctions();
76
        if (isValidSIN(sinValue)) {
77
                log.info("SIN detected: " + sinValue);
78
                metaData.setAverageProbability(1.0);
79
                metaData.setModel("sin");
80
                return metaData;
81
        } else {
82
            log.debug("SIN " + sinValue + " is not valid" );
83
        }
84
85
        return null;
86
    }
87
    
88
 /**
89
     * Algorithm is taken from https://en.wikipedia.org/wiki/Social_Insurance_Number
90
     * @param sin
91
     * @return boolean true, if SIN is valid, otherwise false
92
     */
93
    private static boolean isValidSIN(final String sinNumber) {
94
        String sin = sinNumber;
95
96
        if (sin != null) {
97
            sin = sin.replaceAll(" ", "").replace("-", "").replace(".", "");
98
        }        
99
        
100
        if ((sin.length() != 9)) {
101
            log.debug("SIN length is != 9");
102
            return false;
103
        }
104
105
        if (!sin.matches("[0-9]+")) {
106
            log.debug("SIN " + sin + " is not number");
107
            return false;
108
        }
109
110
        if (sin.startsWith("0")) {
111
            log.debug("SIN " + sin + " starts with zero and it is not valid");
112
            return false;
113
        }        
114
        
115
        final int[]         sinArray   = new int[sin.length()];
116
        final int[]         checkArray = {
117
            1, 2, 1, 2, 1, 2, 1, 2, 1
118
        };
119
        final List<Integer> sinList    = new ArrayList();
120
        for (int i = 0; i < 9; i++) {
121
            sinArray[i] = Integer.valueOf(sin.substring(i, i + 1));
122
            sinArray[i] = sinArray[i] * checkArray[i];
123
        }
124
125
        int sum = 0;
126
127
        for (int i = 0; i < 9; i++) {
128
            final String tmp = String.valueOf(sinArray[i]);
129
130
            if (tmp.length() == 1) {
131
                sinList.add(Integer.valueOf(tmp));
132
                sum += Integer.valueOf(tmp);
133
            } else {
134
                sinList.add(Integer.valueOf(tmp.substring(0, 1)));
135
                sum += Integer.valueOf(tmp.substring(0, 1));
136
                sinList.add(Integer.valueOf(tmp.substring(1, 2)));
137
                sum += Integer.valueOf(tmp.substring(1, 2));
138
            }
139
        }
140
141
        if ((sum % 10) == 0) {
142
            return true;
143
        }
144
        
145
        return false;
146
    }        
147
}
148