1
|
|
|
/** |
2
|
|
|
* Copyright 2014-2018, Armenak Grigoryan, and individual contributors as indicated |
3
|
|
|
* by the @authors tag. See the copyright.txt in the distribution for a |
4
|
|
|
* full listing of individual contributors. |
5
|
|
|
* |
6
|
|
|
* This is free software; you can redistribute it and/or modify it |
7
|
|
|
* under the terms of the GNU Lesser General Public License as |
8
|
|
|
* published by the Free Software Foundation; either version 2.1 of |
9
|
|
|
* the License, or (at your option) any later version. |
10
|
|
|
* |
11
|
|
|
* This software is distributed in the hope that it will be useful, |
12
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14
|
|
|
* Lesser General Public License for more details. |
15
|
|
|
*/ |
16
|
|
|
package com.strider.datadefender.specialcase; |
17
|
|
|
|
18
|
|
|
import com.strider.datadefender.discoverer.Discoverer.ColumnMatch; |
19
|
|
|
import com.strider.datadefender.discoverer.Probability; |
20
|
|
|
import com.strider.datadefender.database.metadata.TableMetaData.ColumnMetaData; |
21
|
|
|
import com.strider.datadefender.extensions.BiographicFunctions; |
22
|
|
|
import com.strider.datadefender.file.metadata.FileMatchMetaData; |
23
|
|
|
import java.util.ArrayList; |
24
|
|
|
|
25
|
|
|
import java.util.List; |
26
|
|
|
import java.util.Objects; |
27
|
|
|
import lombok.extern.log4j.Log4j2; |
28
|
|
|
|
29
|
|
|
import org.apache.commons.lang3.StringUtils; |
30
|
|
|
|
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @author Armenak Grigoryan |
34
|
|
|
*/ |
35
|
|
|
@Log4j2 |
36
|
|
|
public class SinDetector implements SpecialCase { |
37
|
|
|
|
38
|
|
|
public static ColumnMatch detectSin(final ColumnMetaData data, final String text) { |
39
|
|
|
|
40
|
|
|
String sinValue = text; |
41
|
|
|
|
42
|
|
|
if ( |
43
|
|
|
StringUtils.isNotBlank(sinValue) |
44
|
|
|
&& ( |
45
|
|
|
Objects.equals(String.class, data.getColumnType()) |
46
|
|
|
|| Number.class.isAssignableFrom(data.getColumnType()) |
47
|
|
|
) |
48
|
|
|
) { |
49
|
|
|
if (Objects.equals(String.class, data.getColumnType())) { |
50
|
|
|
sinValue = sinValue.replaceAll("\\D+", ""); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
if (isValidSIN(sinValue)) { |
54
|
|
|
log.info("SIN detected: " + sinValue + " in " + data.getTable().getTableName() + "." + data.getColumnName()); |
55
|
|
|
return new ColumnMatch( |
56
|
|
|
data, |
57
|
|
|
1, |
58
|
|
|
"sin", |
59
|
|
|
List.of(new Probability(sinValue, 1.00)) |
60
|
|
|
); |
61
|
|
|
} |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
return null; |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
public static FileMatchMetaData detectSin(final FileMatchMetaData metaData, final String text) { |
68
|
|
|
String sinValue = ""; |
69
|
|
|
|
70
|
|
|
if (StringUtils.isNotBlank(text)) { |
71
|
|
|
sinValue = text; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
log.debug("Trying to find SIN in file " + metaData.getFileName() + " : " + sinValue); |
75
|
|
|
final BiographicFunctions bf = new BiographicFunctions(); |
76
|
|
|
if (isValidSIN(sinValue)) { |
77
|
|
|
log.info("SIN detected: " + sinValue); |
78
|
|
|
metaData.setAverageProbability(1.0); |
79
|
|
|
metaData.setModel("sin"); |
80
|
|
|
return metaData; |
81
|
|
|
} else { |
82
|
|
|
log.debug("SIN " + sinValue + " is not valid" ); |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
return null; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* Algorithm is taken from https://en.wikipedia.org/wiki/Social_Insurance_Number |
90
|
|
|
* @param sin |
91
|
|
|
* @return boolean true, if SIN is valid, otherwise false |
92
|
|
|
*/ |
93
|
|
|
private static boolean isValidSIN(final String sinNumber) { |
94
|
|
|
String sin = sinNumber; |
95
|
|
|
|
96
|
|
|
if (sin != null) { |
97
|
|
|
sin = sin.replaceAll(" ", "").replace("-", "").replace(".", ""); |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
if ((sin.length() != 9)) { |
101
|
|
|
log.debug("SIN length is != 9"); |
102
|
|
|
return false; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
if (!sin.matches("[0-9]+")) { |
106
|
|
|
log.debug("SIN " + sin + " is not number"); |
107
|
|
|
return false; |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
if (sin.startsWith("0")) { |
111
|
|
|
log.debug("SIN " + sin + " starts with zero and it is not valid"); |
112
|
|
|
return false; |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
final int[] sinArray = new int[sin.length()]; |
116
|
|
|
final int[] checkArray = { |
117
|
|
|
1, 2, 1, 2, 1, 2, 1, 2, 1 |
118
|
|
|
}; |
119
|
|
|
final List<Integer> sinList = new ArrayList(); |
120
|
|
|
for (int i = 0; i < 9; i++) { |
121
|
|
|
sinArray[i] = Integer.valueOf(sin.substring(i, i + 1)); |
122
|
|
|
sinArray[i] = sinArray[i] * checkArray[i]; |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
int sum = 0; |
126
|
|
|
|
127
|
|
|
for (int i = 0; i < 9; i++) { |
128
|
|
|
final String tmp = String.valueOf(sinArray[i]); |
129
|
|
|
|
130
|
|
|
if (tmp.length() == 1) { |
131
|
|
|
sinList.add(Integer.valueOf(tmp)); |
132
|
|
|
sum += Integer.valueOf(tmp); |
133
|
|
|
} else { |
134
|
|
|
sinList.add(Integer.valueOf(tmp.substring(0, 1))); |
135
|
|
|
sum += Integer.valueOf(tmp.substring(0, 1)); |
136
|
|
|
sinList.add(Integer.valueOf(tmp.substring(1, 2))); |
137
|
|
|
sum += Integer.valueOf(tmp.substring(1, 2)); |
138
|
|
|
} |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
if ((sum % 10) == 0) { |
142
|
|
|
return true; |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
return false; |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
|