db_extractor.DataManipulatorForTimeSeries   A
last analyzed

Complexity

Total Complexity 17

Size/Duplication

Total Lines 78
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 57
dl 0
loc 78
rs 10
c 0
b 0
f 0
wmc 17

7 Methods

Rating   Name   Duplication   Size   Complexity  
A DataManipulatorForTimeSeries.fn_convert_string_columns_to_datetime() 0 6 2
A DataManipulatorForTimeSeries.fn_add_weekday_columns_to_data_frame() 0 6 3
A DataManipulatorForTimeSeries.__init__() 0 2 1
A DataManipulatorForTimeSeries.fn_add_timeline_evaluation_column_to_data_frame() 0 22 5
A DataManipulatorForTimeSeries.fn_add_days_within_column_to_data_frame() 0 8 2
A DataManipulatorForTimeSeries.fn_get_first_current_and_last_column_value_from_data_frame() 0 7 1
A DataManipulatorForTimeSeries.fn_convert_datetime_columns_to_string() 0 6 3
1
"""
2
Data Manipulation for Time Series class
3
"""
4
# package to handle date and times
5
from datetime import timedelta
6
# package facilitating Data Frames manipulation
7
import pandas
8
# custom classes
9
from db_extractor.BasicNeeds import BasicNeeds
10
11
12
class DataManipulatorForTimeSeries:
13
    class_bn = None
14
15
    def __init__(self, in_language='en_US'):
16
        self.class_bn = BasicNeeds(in_language)
17
18
    @staticmethod
19
    def fn_add_days_within_column_to_data_frame(input_data_frame, dict_expression):
20
        input_data_frame['Days Within'] = input_data_frame[dict_expression['End Date']] - \
21
                                          input_data_frame[dict_expression['Start Date']] + \
22
                                          timedelta(days=1)
23
        input_data_frame['Days Within'] = input_data_frame['Days Within'] \
24
            .apply(lambda x: int(str(x).replace(' days 00:00:00', '')))
25
        return input_data_frame
26
27
    def fn_add_timeline_evaluation_column_to_data_frame(self, in_df, dict_expression):
28
        # shorten last method parameter
29
        de = dict_expression
30
        # add helpful column to use on "Timeline Evaluation" column determination
31
        in_df['rd'] = de['Reference Date']
32
        # rename some columns to cope with long expression
33
        in_df.rename(columns={'Start Date': 'sd', 'End Date': 'ed'}, inplace=True)
34
        # actual "Timeline Evaluation" column determination
35
        cols = ['rd', 'sd', 'ed']
36
        in_df['Timeline Evaluation'] = in_df[cols].apply(lambda r: 'Current'
37
                                                         if r['sd'] <= r['rd'] <= r['ed'] else
38
                                                         'Past' if r['sd'] < r['rd'] else 'Future',
39
                                                         axis=1)
40
        # rename back columns
41
        in_df.rename(columns={'sd': 'Start Date', 'ed': 'End Date', 'rd': 'Reference Date'},
42
                     inplace=True)
43
        # decide if the helpful column is to be retained or not
44
        removal_needed = self.class_bn.fn_decide_by_omission_or_specific_true(
45
            de, 'Remove Reference Date')
46
        if removal_needed:
47
            in_df.drop(columns=['Reference Date'], inplace=True)
48
        return in_df
49
50
    @staticmethod
51
    def fn_add_weekday_columns_to_data_frame(input_data_frame, columns_list):
52
        for current_column in columns_list:
53
            input_data_frame['Weekday for ' + current_column] = input_data_frame[current_column] \
54
                .apply(lambda x: x.strftime('%A'))
55
        return input_data_frame
56
57
    @staticmethod
58
    def fn_convert_datetime_columns_to_string(input_data_frame, columns_list, columns_format):
59
        for current_column in columns_list:
60
            input_data_frame[current_column] = \
61
                input_data_frame[current_column].map(lambda x: x.strftime(columns_format))
62
        return input_data_frame
63
64
    @staticmethod
65
    def fn_convert_string_columns_to_datetime(input_data_frame, columns_list, columns_format):
66
        for current_column in columns_list:
67
            input_data_frame[current_column] = pandas.to_datetime(input_data_frame[current_column],
68
                                                                  format=columns_format)
69
        return input_data_frame
70
71
    @staticmethod
72
    def fn_get_first_current_and_last_column_value_from_data_frame(in_data_frame, in_column_name):
73
        return {
74
            'first': in_data_frame.iloc[0][in_column_name],
75
            'current': in_data_frame.query('`Timeline Evaluation` == "Current"',
76
                                           inplace=False)[in_column_name].max(),
77
            'last': in_data_frame.iloc[(len(in_data_frame) - 1)][in_column_name],
78
        }
79