com.strider.datadefender.anonymizer.functions.Table   A
last analyzed

Complexity

Total Complexity 17

Size/Duplication

Total Lines 159
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 75
c 1
b 0
f 0
dl 0
loc 159
ccs 0
cts 59
cp 0
rs 10
wmc 17

5 Methods

Rating   Name   Duplication   Size   Complexity  
A generateStringListFromDb(String,String) 0 18 4
A mappedColumnShuffle(String,String,String,boolean) 0 19 3
A randomColumnValue(String,String,boolean) 0 18 3
A getNextShuffledItemFor(String) 0 14 3
A getPredictableShuffledValueFor(String,String) 0 22 4
1
/*
2
 * Copyright 2014, Armenak Grigoryan, and individual contributors as indicated
3
 * by the @authors tag. See the copyright.txt in the distribution for a
4
 * full listing of individual contributors.
5
 *
6
 * This is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2.1 of
9
 * the License, or (at your option) any later version.
10
 *
11
 * This software is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
 * Lesser General Public License for more details.
15
 */
16
package com.strider.datadefender.anonymizer.functions;
17
18
import com.strider.datadefender.functions.NamedParameter;
19
import com.strider.datadefender.requirement.registry.DatabaseAwareRequirementFunction;
20
21
import java.sql.Connection;
22
import java.sql.ResultSet;
23
import java.sql.SQLException;
24
import java.sql.Statement;
25
import java.util.Collections;
26
import java.util.HashMap;
27
import java.util.Map;
28
import java.util.Iterator;
29
import java.util.ArrayList;
30
import java.util.List;
31
32
import org.apache.commons.lang3.StringUtils;
33
34
import lombok.extern.log4j.Log4j2;
35
36
/**
37
 * Helper anonymization using table data (shuffle records, etc...).
38
 *
39
 * @author Armenak Grigoryan
40
 */
41
@Log4j2
42
public class Table extends DatabaseAwareRequirementFunction {
43
44
    private static final Map<String, List<String>> stringLists = new HashMap<>();
45
    private static final Map<String, Iterator<String>> stringIters = new HashMap<>();
46
    private static final Map<String, Map<String, String>> predictableShuffle = new HashMap<>();
47
48
    /**
49
     * Returns the next shuffled item from the named collection.
50
     *
51
     * @param name
52
     * @return
53
     */
54
    private String getNextShuffledItemFor(final String name) {
55
        if (stringIters.containsKey(name)) {
56
            final Iterator<String> iter = stringIters.get(name);
57
            if (iter.hasNext()) {
58
                return iter.next();
59
            }
60
        }
61
62
        final List<String> list = stringLists.get(name);
63
        Collections.shuffle(list);
64
65
        final Iterator<String> iter = list.iterator();
66
        stringIters.put(name, iter);
67
        return iter.next();
68
    }
69
70
    /**
71
     * Sets up a map, mapping a list of values to a list of shuffled values.
72
     *
73
     * If the value is not mapped, the function guarantees returning the same
74
     * randomized value for a given column value - however it does not guarantee
75
     * that more than one column value do not have the same randomized value.
76
     *
77
     * @param params
78
     * @return
79
     */
80
    private String getPredictableShuffledValueFor(final String name, final String value) {
81
        if (!predictableShuffle.containsKey(name)) {
82
            final List<String> list = stringLists.get(name);
83
            final List<String> shuffled = new ArrayList<>(list);
84
            Collections.shuffle(shuffled);
85
86
            final Map<String, String> smap = new HashMap<>();
87
            final Iterator<String> lit = list.iterator();
88
            final Iterator<String> sit = shuffled.iterator();
89
            while (lit.hasNext()) {
90
                smap.put(lit.next(), sit.next());
91
            }
92
            predictableShuffle.put(name, smap);
93
        }
94
95
        final Map<String, String> map = predictableShuffle.get(name);
96
        if (!map.containsKey(value)) {
97
            final String[] vals = map.values().toArray(new String[map.size()]);
98
            final int index = (int) Math.abs((long) value.hashCode()) % vals.length;
99
            return vals[index];
100
        }
101
        return map.get(value);
102
    }
103
104
    /**
105
     * Creates a string list of values by querying the database.
106
     *
107
     * @param keyName
108
     * @param query
109
     * @throws java.sql.SQLException
110
     */
111
    protected void generateStringListFromDb(final String keyName, final String query) throws SQLException {
112
        if (!stringLists.containsKey(keyName + query.hashCode())) {
113
            log.info("*** reading from database column: " + keyName);
114
            final List<String> values = new ArrayList<>();
115
116
            log.debug("Query:" + query);
117
            Connection con = dbFactory.getConnection();
118
            try (Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(query)) {
119
                while (rs.next()) {
120
                    values.add(rs.getString(1));
121
                }
122
            }
123
124
            if (values.isEmpty()) {
125
                // TODO: throw a meaningful exception here
126
                log.error("!!! Database column " + keyName + " did not return any values");
127
            }
128
            stringLists.put(keyName + query.hashCode(), values);
129
        }
130
    }
131
132
    /**
133
     * Generates a randomized collection of column values and selects and
134
     * returns one.
135
     *
136
     * The function selects distinct, non-null and non-empty values to choose
137
     * from, then shuffles the collection of strings once before returning items
138
     * from it.  Once all strings have been returned, the collection is
139
     * re-shuffled and re-used.
140
     *
141
     * @param table the table name
142
     * @param column the column name
143
     * @param excludeEmpty set to true to exclude empty values
144
     * @return the next item
145
     * @throws SQLException
146
     */
147
    public String randomColumnValue(
148
        @NamedParameter("table") String table,
149
        @NamedParameter("column") String column,
150
        @NamedParameter("excludeEmpty") boolean excludeEmpty
151
    ) throws SQLException {
152
        
153
        final String keyName = table + "." + column;
154
        final StringBuilder sb = new StringBuilder();
155
        sb.append(String.format("SELECT DISTINCT %s FROM %s", column, table));
156
        if (excludeEmpty) {
157
            if (StringUtils.equalsIgnoreCase("oracle", dbFactory.getVendorName())) {
158
                sb.append(String.format(" WHERE %s IS NOT NULL", column, column));
159
            } else {
160
                sb.append(String.format(" WHERE %s IS NOT NULL AND %s <> ''", column, column));
161
            }
162
        }
163
        generateStringListFromDb(keyName, sb.toString());
164
        return getNextShuffledItemFor(keyName + sb.toString().hashCode());
165
    }
166
167
    /**
168
     * Returns a 'predictable' shuffled value based on the passed value which is
169
     * guaranteed to return the same random value for the same column value.
170
     *
171
     * Note that more than one column value may result in having the same
172
     * shuffled value.
173
     *
174
     * @param table
175
     * @param column
176
     * @param value
177
     * @param excludeEmpty
178
     * @return
179
     * @throws SQLException
180
     */
181
    public String mappedColumnShuffle(
182
        @NamedParameter("table") String table,
183
        @NamedParameter("column") String column,
184
        @NamedParameter("value") String value,
185
        @NamedParameter("excludeEmpty") boolean excludeEmpty
186
    ) throws SQLException {
187
        
188
        final String keyName = table + "." + column;
189
        final StringBuilder sb = new StringBuilder();
190
        sb.append(String.format("SELECT DISTINCT %s FROM %s", column, table));
191
        if (excludeEmpty) {
192
            if (StringUtils.equalsIgnoreCase("oracle", dbFactory.getVendorName())) {
193
                sb.append(String.format(" WHERE %s IS NOT NULL", column, column));
194
            } else {
195
                sb.append(String.format(" WHERE %s IS NOT NULL AND %s <> ''", column, column));
196
            }
197
        }
198
        generateStringListFromDb(keyName, sb.toString());
199
        return getPredictableShuffledValueFor(keyName + sb.toString().hashCode(), value);
200
    }
201
}
202