com.strider.datadefender.anonymizer.DatabaseAnonymizer   F
last analyzed

Complexity

Total Complexity 70

Size/Duplication

Total Lines 450
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 265
dl 0
loc 450
ccs 0
cts 220
cp 0
rs 2.8
c 1
b 0
f 0
wmc 70

8 Methods

Rating   Name   Duplication   Size   Complexity  
A fillColumnNames(Table,Collection) 0 3 2
F getSelectQueryStatement(IDbFactory,Table,Collection,Collection) 0 106 23
A getUpdateQuery(Table,Collection,Collection) 0 23 3
F isExcludedColumn(ResultSet,Column) 0 49 17
A getTruncatedColumnValue(Object,String,TableMetaData) 0 8 3
C anonymizeRow(PreparedStatement,Collection,Collection,ResultSet,TableMetaData) 0 63 8
A anonymize() 0 5 2
D anonymizeTable(Table) 0 99 12

How to fix   Complexity   

Complexity

Complex classes like com.strider.datadefender.anonymizer.DatabaseAnonymizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
/*
2
 * Copyright 2014, Armenak Grigoryan, and individual contributors as indicated
3
 * by the @authors tag. See the copyright.txt in the distribution for a
4
 * full listing of individual contributors.
5
 *
6
 * This is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2.1 of
9
 * the License, or (at your option) any later version.
10
 *
11
 * This software is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
 * Lesser General Public License for more details.
15
 */
16
package com.strider.datadefender.anonymizer;
17
18
import com.strider.datadefender.DataDefenderException;
19
import com.strider.datadefender.DbConfig;
20
import com.strider.datadefender.database.DatabaseException;
21
import com.strider.datadefender.database.IDbFactory;
22
import com.strider.datadefender.database.metadata.TableMetaData;
23
import com.strider.datadefender.database.metadata.TableMetaData.ColumnMetaData;
24
import com.strider.datadefender.requirement.Column;
25
import com.strider.datadefender.requirement.Exclude;
26
import com.strider.datadefender.requirement.Requirement;
27
import com.strider.datadefender.requirement.Table;
28
import com.strider.datadefender.utils.LikeMatcher;
29
30
import java.lang.reflect.InvocationTargetException;
31
import java.sql.Connection;
32
import java.sql.PreparedStatement;
33
import java.sql.ResultSet;
34
import java.sql.SQLException;
35
import java.sql.Types;
36
import java.util.Collection;
37
import java.util.HashMap;
38
import java.util.HashSet;
39
import java.util.LinkedHashSet;
40
import java.util.LinkedList;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Set;
44
45
import org.apache.commons.lang3.StringUtils;
46
import org.apache.commons.collections4.CollectionUtils;
47
48
import lombok.extern.log4j.Log4j2;
49
import lombok.RequiredArgsConstructor;
50
import me.tongfei.progressbar.ProgressBar;
51
52
/**
53
 * Entry point for RDBMS data anonymizer
54
 *
55
 * @author Armenak Grigoryan
56
 */
57
@RequiredArgsConstructor
58
@Log4j2
59
public class DatabaseAnonymizer implements IAnonymizer {
60
61
    final IDbFactory dbFactory;
62
    final DbConfig config;
63
    final int batchSize;
64
    final Requirement requirement;
65
    final List<String> tables;
66
67
    /**
68
     * Adds column names from the table to the passed collection of strings.
69
     *
70
     * @param table
71
     * @param sColumns
72
     */
73
    private void fillColumnNames(final Table table, final Collection<String> sColumns) {
74
        for (final Column column : table.getColumns()) {
75
            sColumns.add(column.getName());
76
        }
77
    }
78
79
    /**
80
     * Creates the UPDATE query for a single row of results.
81
     *
82
     * @param table
83
     * @param columns
84
     * @param keys
85
     * @param updatableKeys
86
     * @return the SQL statement
87
     */
88
    private String getUpdateQuery(final Table table, final Collection<String> updateColumns, final Collection<String> keys) {
89
        final StringBuilder sql = new StringBuilder();
90
        sql.append("UPDATE ").
91
            append(table.getName()).
92
            append(" SET ").
93
            append(StringUtils.join(updateColumns, " = ?, ")).
94
            append(" = ? WHERE ");
95
96
        log.info("keys: " + keys.toString());
97
        int iteration = 0;
98
        final int collectionSize = keys.size();
99
        final StringBuilder whereStmtp = new StringBuilder();
100
        for (final String key: keys) {
101
            ++iteration;
102
            whereStmtp.append(key).append(" = ? ");
103
            if (collectionSize > iteration) {
104
                whereStmtp.append(" AND ");
105
            }
106
        }
107
        sql.append(whereStmtp);
108
109
        log.debug("getUpdateQuery: " + sql.toString());
110
        return sql.toString();
111
    }
112
113
    /**
114
     * Creates the SELECT query for key and update columns.
115
     *
116
     * @param tableName
117
     * @param keys
118
     * @param columns
119
     * @return
120
     */
121
    private PreparedStatement getSelectQueryStatement(final IDbFactory dbFactory, final Table table, final Collection<String> keys, final Collection<String> columns) throws SQLException {
122
123
        final List<String> params = new LinkedList<>();
124
        // final StringBuilder query = new StringBuilder("SELECT DISTINCT ");
125
        final StringBuilder query = new StringBuilder("SELECT ");
126
        if (CollectionUtils.isNotEmpty(keys)) {
127
            query.append(StringUtils.join(keys, ", ")).append(", ");
128
        }
129
        query
130
            .append(StringUtils.join(columns, ", "))
131
            .append(" FROM ")
132
            .append(table.getName());
133
134
        if (!StringUtils.isBlank(table.getWhere())) {
135
            query.append(" WHERE (").append(table.getWhere());
136
        }
137
138
        final List<Exclude> exclusions = table.getExclusions();
139
        if (exclusions != null) {
140
            String separator = " WHERE (";
141
            if (query.indexOf(" WHERE (") != -1) {
142
                separator = ") AND (";
143
            }
144
            for (final Exclude exc : exclusions) {
145
                final String eq = exc.getEquals();
146
                final String lk = exc.getLike();
147
                final List<String> in = exc.getExcludeInList();
148
                final boolean nl = exc.isExcludeNull();
149
                final String col = exc.getName();
150
151
                if (col != null && col.length() != 0) {
152
                    if (eq != null) {
153
                        query.append(separator).append('(').append(col).append(" != ? OR ").append(col).append(" IS NULL)");
154
                        params.add(eq);
155
                        separator = " AND ";
156
                    }
157
                    if (lk != null && lk.length() != 0) {
158
                        query.append(separator).append('(').append(col).append(" NOT LIKE ? OR ").append(col).append(" IS NULL)");
159
                        params.add(lk);
160
                        separator = " AND ";
161
                    }
162
                    if (CollectionUtils.isNotEmpty(in)) {
163
                        String qs = "?" + StringUtils.repeat(", ?", in.size() - 1);
164
                        query.append(separator).append('(').append(col).append(" NOT IN (")
165
                            .append(qs).append(") OR ").append(col).append(" IS NULL)");
166
                        params.addAll(in);
167
                        separator = " AND ";
168
                    }
169
                    if (nl) {
170
                        query.append(separator).append(col).append(" IS NOT NULL");
171
                        separator = " AND ";
172
                    }
173
                }
174
            }
175
176
            if (query.indexOf(" WHERE (") != -1) {
177
                separator = ") AND (";
178
            }
179
180
            for (final Exclude exc : exclusions) {
181
                final String neq = exc.getNotEquals();
182
                final String nlk = exc.getNotLike();
183
                final List<String> nin = exc.getExcludeNotInList();
184
                final String col = exc.getName();
185
186
                if (neq != null) {
187
                    query.append(separator).append(col).append(" = ?");
188
                    separator = " OR ";
189
                }
190
                if (nlk != null && nlk.length() != 0) {
191
                    query.append(separator).append(col).append(" LIKE ?");
192
                    separator = " OR ";
193
                }
194
                if (CollectionUtils.isNotEmpty(nin)) {
195
                    String qs = "?" + StringUtils.repeat(", ?", nin.size() - 1);
196
                    query.append(separator).append(col).append(" IN (").append(qs).append(")");
197
                    params.addAll(nin);
198
                    separator = " OR ";
199
                }
200
            }
201
        }
202
        if (query.indexOf(" WHERE (") != -1) {
203
            query.append(')');
204
        }
205
206
        final PreparedStatement stmt = dbFactory.getConnection().prepareStatement(
207
                query.toString(),
208
                ResultSet.TYPE_FORWARD_ONLY,
209
                ResultSet.CONCUR_UPDATABLE
210
        );
211
        if (dbFactory.getVendorName().equalsIgnoreCase("mysql")) {
212
            stmt.setFetchSize(Integer.MIN_VALUE);
213
        }
214
215
        int paramIndex = 1;
216
        for (final String param : params) {
217
            stmt.setString(paramIndex, param);
218
            ++paramIndex;
219
        }
220
221
        log.debug("Querying for: " + query.toString());
222
        if (params.size() > 0) {
223
            log.debug("\t - with parameters: " + StringUtils.join(params, ','));
224
        }
225
226
        return stmt;
227
    }
228
229
    /**
230
     * Returns true if the current column's value is excluded by the rulesets
231
     * defined by the Requirements.
232
     *
233
     * @param db
234
     * @param row
235
     * @param column
236
     * @return the columns value
237
     * @throws SQLException
238
     */
239
    private boolean isExcludedColumn(final ResultSet row, final Column column) throws SQLException {
240
241
        final String columnName = column.getName();
242
243
        final List<Exclude> exclusions = column.getExclusions();
244
        boolean hasInclusions = false;
245
        boolean passedInclusion = false;
246
247
        if (exclusions != null) {
248
            for (final Exclude exc : exclusions) {
249
                String name = exc.getName();
250
                final String eq = exc.getEquals();
251
                final String lk = exc.getLike();
252
                final String neq = exc.getNotEquals();
253
                final String nlk = exc.getNotLike();
254
                final boolean nl = exc.isExcludeNull();
255
                if (name == null || name.length() == 0) {
256
                    name = columnName;
257
                }
258
                final String testValue = row.getString(name);
259
260
                if (nl && testValue == null) {
261
                    return true;
262
                } else if (eq != null && eq.equals(testValue)) {
263
                    return true;
264
                } else if (lk != null && lk.length() != 0) {
265
                    final LikeMatcher matcher = new LikeMatcher(lk);
266
                    if (matcher.matches(testValue)) {
267
                        return true;
268
                    }
269
                }
270
271
                if (neq != null) {
272
                    hasInclusions = true;
273
                    if (neq.equals(testValue)) {
274
                        passedInclusion = true;
275
                    }
276
                }
277
                if (nlk != null && nlk.length() != 0) {
278
                    hasInclusions = true;
279
                    final LikeMatcher matcher = new LikeMatcher(nlk);
280
                    if (matcher.matches(testValue)) {
281
                        passedInclusion = true;
282
                    }
283
                }
284
            }
285
        }
286
287
        return hasInclusions && !passedInclusion;
288
    }
289
290
    /**
291
     * Returns the passed colValue truncated to the column's size in the table.
292
     *
293
     * @param colValue
294
     * @param colName
295
     * @param columnMetaData
296
     * @return
297
     * @throws SQLException
298
     */
299
    private Object getTruncatedColumnValue(final Object colValue, final String colName, final TableMetaData tableMetaData) throws SQLException {
300
        final ColumnMetaData col = tableMetaData.getColumn(colName);
301
        final int colSize = col.getColumnSize();
302
        final Class clazz = col.getColumnType();
303
        if (clazz.equals(String.class) && colValue.toString().length() > colSize) {
304
            return colValue.toString().substring(0, colSize);
305
        }
306
        return colValue;
307
    }
308
309
    /**
310
     * Anonymizes a row of columns.
311
     *
312
     * Sets query parameters on the passed updateStmt - this includes the key
313
     * values - and calls anonymization functions for the columns.
314
     *
315
     * @param updateStmt
316
     * @param tableColumns
317
     * @param keyNames
318
     * @param db
319
     * @param row
320
     * @param columnMetaData
321
     * @throws SQLException
322
     * @throws NoSuchMethodException
323
     * @throws SecurityException
324
     * @throws IllegalAccessException
325
     * @throws IllegalArgumentException
326
     * @throws InvocationTargetException
327
     */
328
    private void anonymizeRow(
329
        final PreparedStatement updateStmt,
330
        final Collection<Column> tableColumns,
331
        final Collection<String> keyNames,
332
        final ResultSet row,
333
        final TableMetaData tableMetaData
334
    ) throws SQLException,
335
             NoSuchMethodException,
336
             SecurityException,
337
             IllegalAccessException,
338
             IllegalArgumentException,
339
             InvocationTargetException,
340
             DatabaseException,
341
             InstantiationException {
342
343
        int fieldIndex = 0;
344
        final Map<String, Integer> columnIndexes = new HashMap<>(tableColumns.size());
345
        final Set<String> anonymized = new HashSet<>(tableColumns.size());
346
347
        
348
        log.debug("Temporary debug line: before Column loop  ");
349
        
350
        for (final Column column : tableColumns) {
351
            final String columnName = column.getName();
352
            if (anonymized.contains(columnName)) {
353
                continue;
354
            }
355
            if (!columnIndexes.containsKey(columnName)) {
356
                final int columnIndex = ++fieldIndex;
357
                columnIndexes.put(columnName, columnIndex);
358
            }
359
            if (isExcludedColumn(row, column)) {
360
                final String columnValue = row.getString(columnName);
361
                updateStmt.setString(columnIndexes.get(columnName), columnValue);
362
                log.debug("Excluding column: " + columnName + " with value: " + columnValue);
363
                continue;
364
            }
365
366
            log.debug("Invoking column plan for {}", columnName);
367
            anonymized.add(columnName);
368
            final Object colValue = column.invokeFunctionChain(row);
369
            log.debug("colValue = " + colValue);
370
            log.debug("type= " + (colValue != null ? colValue.getClass() : "null"));
371
            if (colValue == null) {
372
                updateStmt.setNull(columnIndexes.get(columnName), Types.NULL);
373
            } else {
374
                updateStmt.setObject(
375
                    columnIndexes.get(columnName),
376
                    getTruncatedColumnValue(
377
                        colValue,
378
                        columnName,
379
                        tableMetaData
380
                    )
381
                );
382
            }
383
        }
384
385
        int whereIndex = fieldIndex;
386
        for (final String key : keyNames) {
387
            updateStmt.setString(++whereIndex, row.getString(key));
388
        }
389
390
        updateStmt.addBatch();
391
    }
392
393
    /**
394
     * Anonymization function for a single table.
395
     *
396
     * Sets up queries, loops over columns and anonymizes columns for the passed
397
     * Table.
398
     *
399
     * @param table
400
     */
401
    private void anonymizeTable(final Table table) throws DatabaseException, InstantiationException {
402
403
        if (StringUtils.isBlank(table.getWhere())) {
404
            log.info("Table [" + table.getName() + "]. Start ...");
405
        } else {
406
            log.info("Table [" + table.getName() + ", where=" + table.getWhere() + "]. Start ...");
407
        }
408
409
        final List<Column> tableColumns = table.getColumns();
410
        // colNames is looked up with contains, and iterated over.  Using LinkedHashSet means
411
        // duplicate column names won't be added to the query, so a check in the column loop
412
        // below was created to ensure a reasonable warning message is logged if that happens.
413
        final Set<String> colNames = new LinkedHashSet<>(tableColumns.size());
414
        // keyNames is only iterated over, so no need for a hash set
415
        final List<String> keyNames = table.getPrimaryKeyColumnNames();
416
417
        fillColumnNames(table, colNames);
418
419
        // required in this scope for 'catch' block
420
        PreparedStatement countQuery = null;
421
        PreparedStatement selectStmt = null;
422
        PreparedStatement updateStmt = null;
423
        ResultSet rs = null;
424
        final Connection updateCon = dbFactory.getUpdateConnection();
425
426
        try {
427
428
            countQuery = getSelectQueryStatement(dbFactory, table, null, List.of("COUNT(*)"));
429
            rs = countQuery.executeQuery();
430
            int total = 0;
431
            if (rs.next()) {
432
                total = rs.getInt(1);
433
            }
434
            rs.close();
435
436
            selectStmt = getSelectQueryStatement(dbFactory, table, keyNames, colNames);
437
            rs = selectStmt.executeQuery();
438
439
            final TableMetaData tableMetaData = dbFactory.fetchMetaData().getMetaDataFor(rs);
440
441
            final String updateString = getUpdateQuery(table, colNames, keyNames);
442
            updateStmt = updateCon.prepareStatement(updateString);
443
444
            int batchCounter = 0;
445
            int rowCount = 0;
446
447
            try (ProgressBar pb = new ProgressBar("Anonymizing table " + table.getName() + "...", total)) {
448
                while (rs.next()) {
449
                    anonymizeRow(updateStmt, tableColumns, keyNames, rs, tableMetaData);
450
                    batchCounter++;
451
                    if (batchCounter == batchSize) {
452
                        updateStmt.executeBatch();
453
                        updateCon.commit();
454
                        batchCounter = 0;
455
                    }
456
                    pb.step();
457
                    rowCount++;
458
                }
459
            }
460
461
            log.debug("Rows processed: " + rowCount);
462
463
            updateStmt.executeBatch();
464
            log.debug("Batch executed");
465
            updateCon.commit();
466
            log.debug("Commit");
467
            selectStmt.close();
468
            updateStmt.close();
469
            rs.close();
470
            log.debug("Closing open resources");
471
472
        } catch (SQLException | NoSuchMethodException | SecurityException | IllegalAccessException |
473
                 IllegalArgumentException | InvocationTargetException | DataDefenderException ex ) {
474
            log.error(ex.toString());
475
            if (ex.getCause() != null) {
476
                log.error(ex.getCause().toString());
477
            }
478
            throw new DatabaseException("Exception anonymizing table", ex);
479
        } finally {
480
            try {
481
                if (countQuery != null) {
482
                    countQuery.close();
483
                }
484
                if (selectStmt != null) {
485
                    selectStmt.close();
486
                }
487
                if (updateStmt != null) {
488
                    updateStmt.close();
489
                }
490
                if (rs != null) {
491
                    rs.close();
492
                }
493
            } catch (SQLException sqlex) {
494
                log.error(sqlex.toString());
495
            }
496
        }
497
498
        log.info("Table " + table.getName() + ". End ...");
499
        log.info("");
500
    }
501
502
    @Override
503
    public void anonymize() throws DataDefenderException, InstantiationException {
504
        log.info("Anonymizing data for project: {} version: {}", requirement.getProject(), requirement.getVersion());
505
        for (final Table reqTable : requirement.getFilteredTables(tables)) {
506
            anonymizeTable(reqTable);
507
        }
508
    }
509
}
510