Completed
Branch master (dc3042)
by Zaahid
03:52
created

com.strider.datadefender.anonymizer.DatabaseAnonymizer   F

Complexity

Total Complexity 70

Size/Duplication

Total Lines 447
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 264
c 1
b 0
f 0
dl 0
loc 447
ccs 0
cts 220
cp 0
rs 2.8
wmc 70

8 Methods

Rating   Name   Duplication   Size   Complexity  
A fillColumnNames(Table,Collection) 0 3 2
C anonymizeRow(PreparedStatement,Collection,Collection,ResultSet,TableMetaData) 0 60 8
F getSelectQueryStatement(IDbFactory,Table,Collection,Collection) 0 106 23
A getUpdateQuery(Table,Collection,Collection) 0 23 3
A anonymize() 0 5 2
D anonymizeTable(Table) 0 99 12
F isExcludedColumn(ResultSet,Column) 0 49 17
A getTruncatedColumnValue(Object,String,TableMetaData) 0 8 3

How to fix   Complexity   

Complexity

Complex classes like com.strider.datadefender.anonymizer.DatabaseAnonymizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
/*
2
 * Copyright 2014, Armenak Grigoryan, and individual contributors as indicated
3
 * by the @authors tag. See the copyright.txt in the distribution for a
4
 * full listing of individual contributors.
5
 *
6
 * This is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2.1 of
9
 * the License, or (at your option) any later version.
10
 *
11
 * This software is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
 * Lesser General Public License for more details.
15
 */
16
package com.strider.datadefender.anonymizer;
17
18
import com.strider.datadefender.DataDefenderException;
19
import com.strider.datadefender.DbConfig;
20
import com.strider.datadefender.database.DatabaseException;
21
import com.strider.datadefender.database.IDbFactory;
22
import com.strider.datadefender.database.metadata.TableMetaData;
23
import com.strider.datadefender.database.metadata.TableMetaData.ColumnMetaData;
24
import com.strider.datadefender.requirement.Column;
25
import com.strider.datadefender.requirement.Exclude;
26
import com.strider.datadefender.requirement.Requirement;
27
import com.strider.datadefender.requirement.Table;
28
import com.strider.datadefender.utils.LikeMatcher;
29
30
import java.lang.reflect.InvocationTargetException;
31
import java.sql.Connection;
32
import java.sql.PreparedStatement;
33
import java.sql.ResultSet;
34
import java.sql.SQLException;
35
import java.sql.Types;
36
import java.util.Collection;
37
import java.util.HashMap;
38
import java.util.HashSet;
39
import java.util.LinkedHashSet;
40
import java.util.LinkedList;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Set;
44
45
import org.apache.commons.lang3.StringUtils;
46
import org.apache.commons.collections4.CollectionUtils;
47
48
import lombok.extern.log4j.Log4j2;
49
import lombok.RequiredArgsConstructor;
50
import me.tongfei.progressbar.ProgressBar;
51
52
/**
53
 * Entry point for RDBMS data anonymizer
54
 *
55
 * @author Armenak Grigoryan
56
 */
57
@RequiredArgsConstructor
58
@Log4j2
59
public class DatabaseAnonymizer implements IAnonymizer {
60
61
    final IDbFactory dbFactory;
62
    final DbConfig config;
63
    final int batchSize;
64
    final Requirement requirement;
65
    final List<String> tables;
66
67
    /**
68
     * Adds column names from the table to the passed collection of strings.
69
     *
70
     * @param table
71
     * @param sColumns
72
     */
73
    private void fillColumnNames(final Table table, final Collection<String> sColumns) {
74
        for (final Column column : table.getColumns()) {
75
            sColumns.add(column.getName());
76
        }
77
    }
78
79
    /**
80
     * Creates the UPDATE query for a single row of results.
81
     *
82
     * @param table
83
     * @param columns
84
     * @param keys
85
     * @param updatableKeys
86
     * @return the SQL statement
87
     */
88
    private String getUpdateQuery(final Table table, final Collection<String> updateColumns, final Collection<String> keys) {
89
        final StringBuilder sql = new StringBuilder();
90
        sql.append("UPDATE ").
91
            append(table.getName()).
92
            append(" SET ").
93
            append(StringUtils.join(updateColumns, " = ?, ")).
94
            append(" = ? WHERE ");
95
96
        log.info("keys: " + keys.toString());
97
        int iteration = 0;
98
        final int collectionSize = keys.size();
99
        final StringBuilder whereStmtp = new StringBuilder();
100
        for (final String key: keys) {
101
            ++iteration;
102
            whereStmtp.append(key).append(" = ? ");
103
            if (collectionSize > iteration) {
104
                whereStmtp.append(" AND ");
105
            }
106
        }
107
        sql.append(whereStmtp);
108
109
        log.debug("getUpdateQuery: " + sql.toString());
110
        return sql.toString();
111
    }
112
113
    /**
114
     * Creates the SELECT query for key and update columns.
115
     *
116
     * @param tableName
117
     * @param keys
118
     * @param columns
119
     * @return
120
     */
121
    private PreparedStatement getSelectQueryStatement(final IDbFactory dbFactory, final Table table, final Collection<String> keys, final Collection<String> columns) throws SQLException {
122
123
        final List<String> params = new LinkedList<>();
124
        // final StringBuilder query = new StringBuilder("SELECT DISTINCT ");
125
        final StringBuilder query = new StringBuilder("SELECT ");
126
        if (CollectionUtils.isNotEmpty(keys)) {
127
            query.append(StringUtils.join(keys, ", ")).append(", ");
128
        }
129
        query
130
            .append(StringUtils.join(columns, ", "))
131
            .append(" FROM ")
132
            .append(table.getName());
133
134
        if (!StringUtils.isBlank(table.getWhere())) {
135
            query.append(" WHERE (").append(table.getWhere());
136
        }
137
138
        final List<Exclude> exclusions = table.getExclusions();
139
        if (exclusions != null) {
140
            String separator = " WHERE (";
141
            if (query.indexOf(" WHERE (") != -1) {
142
                separator = ") AND (";
143
            }
144
            for (final Exclude exc : exclusions) {
145
                final String eq = exc.getEquals();
146
                final String lk = exc.getLike();
147
                final List<String> in = exc.getExcludeInList();
148
                final boolean nl = exc.isExcludeNull();
149
                final String col = exc.getName();
150
151
                if (col != null && col.length() != 0) {
152
                    if (eq != null) {
153
                        query.append(separator).append('(').append(col).append(" != ? OR ").append(col).append(" IS NULL)");
154
                        params.add(eq);
155
                        separator = " AND ";
156
                    }
157
                    if (lk != null && lk.length() != 0) {
158
                        query.append(separator).append('(').append(col).append(" NOT LIKE ? OR ").append(col).append(" IS NULL)");
159
                        params.add(lk);
160
                        separator = " AND ";
161
                    }
162
                    if (CollectionUtils.isNotEmpty(in)) {
163
                        String qs = "?" + StringUtils.repeat(", ?", in.size() - 1);
164
                        query.append(separator).append('(').append(col).append(" NOT IN (")
165
                            .append(qs).append(") OR ").append(col).append(" IS NULL)");
166
                        params.addAll(in);
167
                        separator = " AND ";
168
                    }
169
                    if (nl) {
170
                        query.append(separator).append(col).append(" IS NOT NULL");
171
                        separator = " AND ";
172
                    }
173
                }
174
            }
175
176
            if (query.indexOf(" WHERE (") != -1) {
177
                separator = ") AND (";
178
            }
179
180
            for (final Exclude exc : exclusions) {
181
                final String neq = exc.getNotEquals();
182
                final String nlk = exc.getNotLike();
183
                final List<String> nin = exc.getExcludeNotInList();
184
                final String col = exc.getName();
185
186
                if (neq != null) {
187
                    query.append(separator).append(col).append(" = ?");
188
                    separator = " OR ";
189
                }
190
                if (nlk != null && nlk.length() != 0) {
191
                    query.append(separator).append(col).append(" LIKE ?");
192
                    separator = " OR ";
193
                }
194
                if (CollectionUtils.isNotEmpty(nin)) {
195
                    String qs = "?" + StringUtils.repeat(", ?", nin.size() - 1);
196
                    query.append(separator).append(col).append(" IN (").append(qs).append(")");
197
                    params.addAll(nin);
198
                    separator = " OR ";
199
                }
200
            }
201
        }
202
        if (query.indexOf(" WHERE (") != -1) {
203
            query.append(')');
204
        }
205
206
        final PreparedStatement stmt = dbFactory.getConnection().prepareStatement(
207
                query.toString(),
208
                ResultSet.TYPE_FORWARD_ONLY,
209
                ResultSet.CONCUR_UPDATABLE
210
        );
211
        if (dbFactory.getVendorName().equalsIgnoreCase("mysql")) {
212
            stmt.setFetchSize(Integer.MIN_VALUE);
213
        }
214
215
        int paramIndex = 1;
216
        for (final String param : params) {
217
            stmt.setString(paramIndex, param);
218
            ++paramIndex;
219
        }
220
221
        log.debug("Querying for: " + query.toString());
222
        if (params.size() > 0) {
223
            log.debug("\t - with parameters: " + StringUtils.join(params, ','));
224
        }
225
226
        return stmt;
227
    }
228
229
    /**
230
     * Returns true if the current column's value is excluded by the rulesets
231
     * defined by the Requirements.
232
     *
233
     * @param db
234
     * @param row
235
     * @param column
236
     * @return the columns value
237
     * @throws SQLException
238
     */
239
    private boolean isExcludedColumn(final ResultSet row, final Column column) throws SQLException {
240
241
        final String columnName = column.getName();
242
243
        final List<Exclude> exclusions = column.getExclusions();
244
        boolean hasInclusions = false;
245
        boolean passedInclusion = false;
246
247
        if (exclusions != null) {
248
            for (final Exclude exc : exclusions) {
249
                String name = exc.getName();
250
                final String eq = exc.getEquals();
251
                final String lk = exc.getLike();
252
                final String neq = exc.getNotEquals();
253
                final String nlk = exc.getNotLike();
254
                final boolean nl = exc.isExcludeNull();
255
                if (name == null || name.length() == 0) {
256
                    name = columnName;
257
                }
258
                final String testValue = row.getString(name);
259
260
                if (nl && testValue == null) {
261
                    return true;
262
                } else if (eq != null && eq.equals(testValue)) {
263
                    return true;
264
                } else if (lk != null && lk.length() != 0) {
265
                    final LikeMatcher matcher = new LikeMatcher(lk);
266
                    if (matcher.matches(testValue)) {
267
                        return true;
268
                    }
269
                }
270
271
                if (neq != null) {
272
                    hasInclusions = true;
273
                    if (neq.equals(testValue)) {
274
                        passedInclusion = true;
275
                    }
276
                }
277
                if (nlk != null && nlk.length() != 0) {
278
                    hasInclusions = true;
279
                    final LikeMatcher matcher = new LikeMatcher(nlk);
280
                    if (matcher.matches(testValue)) {
281
                        passedInclusion = true;
282
                    }
283
                }
284
            }
285
        }
286
287
        return hasInclusions && !passedInclusion;
288
    }
289
290
    /**
291
     * Returns the passed colValue truncated to the column's size in the table.
292
     *
293
     * @param colValue
294
     * @param colName
295
     * @param columnMetaData
296
     * @return
297
     * @throws SQLException
298
     */
299
    private Object getTruncatedColumnValue(final Object colValue, final String colName, final TableMetaData tableMetaData) throws SQLException {
300
        final ColumnMetaData col = tableMetaData.getColumn(colName);
301
        final int colSize = col.getColumnSize();
302
        final Class clazz = col.getColumnType();
303
        if (clazz.equals(String.class) && colValue.toString().length() > colSize) {
304
            return colValue.toString().substring(0, colSize);
305
        }
306
        return colValue;
307
    }
308
309
    /**
310
     * Anonymizes a row of columns.
311
     *
312
     * Sets query parameters on the passed updateStmt - this includes the key
313
     * values - and calls anonymization functions for the columns.
314
     *
315
     * @param updateStmt
316
     * @param tableColumns
317
     * @param keyNames
318
     * @param db
319
     * @param row
320
     * @param columnMetaData
321
     * @throws SQLException
322
     * @throws NoSuchMethodException
323
     * @throws SecurityException
324
     * @throws IllegalAccessException
325
     * @throws IllegalArgumentException
326
     * @throws InvocationTargetException
327
     */
328
    private void anonymizeRow(
329
        final PreparedStatement updateStmt,
330
        final Collection<Column> tableColumns,
331
        final Collection<String> keyNames,
332
        final ResultSet row,
333
        final TableMetaData tableMetaData
334
    ) throws SQLException,
335
             NoSuchMethodException,
336
             SecurityException,
337
             IllegalAccessException,
338
             IllegalArgumentException,
339
             InvocationTargetException,
340
             DatabaseException,
341
             InstantiationException {
342
343
        int fieldIndex = 0;
344
        final Map<String, Integer> columnIndexes = new HashMap<>(tableColumns.size());
345
        final Set<String> anonymized = new HashSet<>(tableColumns.size());
346
347
        for (final Column column : tableColumns) {
348
            final String columnName = column.getName();
349
            if (anonymized.contains(columnName)) {
350
                continue;
351
            }
352
            if (!columnIndexes.containsKey(columnName)) {
353
                final int columnIndex = ++fieldIndex;
354
                columnIndexes.put(columnName, columnIndex);
355
            }
356
            if (isExcludedColumn(row, column)) {
357
                final String columnValue = row.getString(columnName);
358
                updateStmt.setString(columnIndexes.get(columnName), columnValue);
359
                log.debug("Excluding column: " + columnName + " with value: " + columnValue);
360
                continue;
361
            }
362
363
            log.debug("Invoking column plan for {}", columnName);
364
            anonymized.add(columnName);
365
            final Object colValue = column.invokeFunctionChain(row);
366
            log.debug("colValue = " + colValue);
367
            log.debug("type= " + (colValue != null ? colValue.getClass() : "null"));
368
            if (colValue == null) {
369
                updateStmt.setNull(columnIndexes.get(columnName), Types.NULL);
370
            } else {
371
                updateStmt.setObject(
372
                    columnIndexes.get(columnName),
373
                    getTruncatedColumnValue(
374
                        colValue,
375
                        columnName,
376
                        tableMetaData
377
                    )
378
                );
379
            }
380
        }
381
382
        int whereIndex = fieldIndex;
383
        for (final String key : keyNames) {
384
            updateStmt.setString(++whereIndex, row.getString(key));
385
        }
386
387
        updateStmt.addBatch();
388
    }
389
390
    /**
391
     * Anonymization function for a single table.
392
     *
393
     * Sets up queries, loops over columns and anonymizes columns for the passed
394
     * Table.
395
     *
396
     * @param table
397
     */
398
    private void anonymizeTable(final Table table) throws DatabaseException, InstantiationException {
399
400
        if (StringUtils.isBlank(table.getWhere())) {
401
            log.info("Table [" + table.getName() + "]. Start ...");
402
        } else {
403
            log.info("Table [" + table.getName() + ", where=" + table.getWhere() + "]. Start ...");
404
        }
405
406
        final List<Column> tableColumns = table.getColumns();
407
        // colNames is looked up with contains, and iterated over.  Using LinkedHashSet means
408
        // duplicate column names won't be added to the query, so a check in the column loop
409
        // below was created to ensure a reasonable warning message is logged if that happens.
410
        final Set<String> colNames = new LinkedHashSet<>(tableColumns.size());
411
        // keyNames is only iterated over, so no need for a hash set
412
        final List<String> keyNames = table.getPrimaryKeyColumnNames();
413
414
        fillColumnNames(table, colNames);
415
416
        // required in this scope for 'catch' block
417
        PreparedStatement countQuery = null;
418
        PreparedStatement selectStmt = null;
419
        PreparedStatement updateStmt = null;
420
        ResultSet rs = null;
421
        final Connection updateCon = dbFactory.getUpdateConnection();
422
423
        try {
424
425
            countQuery = getSelectQueryStatement(dbFactory, table, null, List.of("COUNT(*)"));
426
            rs = countQuery.executeQuery();
427
            int total = 0;
428
            if (rs.next()) {
429
                total = rs.getInt(1);
430
            }
431
            rs.close();
432
433
            selectStmt = getSelectQueryStatement(dbFactory, table, keyNames, colNames);
434
            rs = selectStmt.executeQuery();
435
436
            final TableMetaData tableMetaData = dbFactory.fetchMetaData().getMetaDataFor(rs);
437
438
            final String updateString = getUpdateQuery(table, colNames, keyNames);
439
            updateStmt = updateCon.prepareStatement(updateString);
440
441
            int batchCounter = 0;
442
            int rowCount = 0;
443
444
            try (ProgressBar pb = new ProgressBar("Anonymizing table " + table.getName() + "...", total)) {
445
                while (rs.next()) {
446
                    anonymizeRow(updateStmt, tableColumns, keyNames, rs, tableMetaData);
447
                    batchCounter++;
448
                    if (batchCounter == batchSize) {
449
                        updateStmt.executeBatch();
450
                        updateCon.commit();
451
                        batchCounter = 0;
452
                    }
453
                    pb.step();
454
                    rowCount++;
455
                }
456
            }
457
458
            log.debug("Rows processed: " + rowCount);
459
460
            updateStmt.executeBatch();
461
            log.debug("Batch executed");
462
            updateCon.commit();
463
            log.debug("Commit");
464
            selectStmt.close();
465
            updateStmt.close();
466
            rs.close();
467
            log.debug("Closing open resources");
468
469
        } catch (SQLException | NoSuchMethodException | SecurityException | IllegalAccessException |
470
                 IllegalArgumentException | InvocationTargetException | DataDefenderException ex ) {
471
            log.error(ex.toString());
472
            if (ex.getCause() != null) {
473
                log.error(ex.getCause().toString());
474
            }
475
            throw new DatabaseException("Exception anonymizing table", ex);
476
        } finally {
477
            try {
478
                if (countQuery != null) {
479
                    countQuery.close();
480
                }
481
                if (selectStmt != null) {
482
                    selectStmt.close();
483
                }
484
                if (updateStmt != null) {
485
                    updateStmt.close();
486
                }
487
                if (rs != null) {
488
                    rs.close();
489
                }
490
            } catch (SQLException sqlex) {
491
                log.error(sqlex.toString());
492
            }
493
        }
494
495
        log.info("Table " + table.getName() + ". End ...");
496
        log.info("");
497
    }
498
499
    @Override
500
    public void anonymize() throws DataDefenderException, InstantiationException {
501
        log.info("Anonymizing data for project: {} version: {}", requirement.getProject(), requirement.getVersion());
502
        for (final Table reqTable : requirement.getFilteredTables(tables)) {
503
            anonymizeTable(reqTable);
504
        }
505
    }
506
}
507