1
|
|
|
""" |
2
|
|
|
Data Manipulation for Time Series class |
3
|
|
|
""" |
4
|
|
|
# package to handle date and times |
5
|
|
|
from datetime import timedelta |
6
|
|
|
# package facilitating Data Frames manipulation |
7
|
|
|
import pandas |
8
|
|
|
# custom classes |
9
|
|
|
from db_extractor.BasicNeeds import BasicNeeds |
10
|
|
|
|
11
|
|
|
|
12
|
|
|
class DataManipulatorForTimeSeries: |
13
|
|
|
class_bn = None |
14
|
|
|
|
15
|
|
|
def __init__(self, in_language='en_US'): |
16
|
|
|
self.class_bn = BasicNeeds(in_language) |
17
|
|
|
|
18
|
|
|
@staticmethod |
19
|
|
|
def fn_add_days_within_column_to_data_frame(input_data_frame, dict_expression): |
20
|
|
|
input_data_frame['Days Within'] = input_data_frame[dict_expression['End Date']] - \ |
21
|
|
|
input_data_frame[dict_expression['Start Date']] + \ |
22
|
|
|
timedelta(days=1) |
23
|
|
|
input_data_frame['Days Within'] = input_data_frame['Days Within'] \ |
24
|
|
|
.apply(lambda x: int(str(x).replace(' days 00:00:00', ''))) |
25
|
|
|
return input_data_frame |
26
|
|
|
|
27
|
|
|
def fn_add_timeline_evaluation_column_to_data_frame(self, in_df, dict_expression): |
28
|
|
|
# shorten last method parameter |
29
|
|
|
de = dict_expression |
30
|
|
|
# add helpful column to use on "Timeline Evaluation" column determination |
31
|
|
|
in_df['rd'] = de['Reference Date'] |
32
|
|
|
# rename some columns to cope with long expression |
33
|
|
|
in_df.rename(columns={'Start Date': 'sd', 'End Date': 'ed'}, inplace=True) |
34
|
|
|
# actual "Timeline Evaluation" column determination |
35
|
|
|
cols = ['rd', 'sd', 'ed'] |
36
|
|
|
in_df['Timeline Evaluation'] = in_df[cols].apply(lambda r: 'Current' |
37
|
|
|
if r['sd'] <= r['rd'] <= r['ed'] else |
38
|
|
|
'Past' if r['sd'] < r['rd'] else 'Future', |
39
|
|
|
axis=1) |
40
|
|
|
# rename back columns |
41
|
|
|
in_df.rename(columns={'sd': 'Start Date', 'ed': 'End Date', 'rd': 'Reference Date'}, |
42
|
|
|
inplace=True) |
43
|
|
|
# decide if the helpful column is to be retained or not |
44
|
|
|
removal_needed = self.class_bn.fn_decide_by_omission_or_specific_true( |
45
|
|
|
de, 'Remove Reference Date') |
46
|
|
|
if removal_needed: |
47
|
|
|
in_df.drop(columns=['Reference Date'], inplace=True) |
48
|
|
|
return in_df |
49
|
|
|
|
50
|
|
|
@staticmethod |
51
|
|
|
def fn_add_weekday_columns_to_data_frame(input_data_frame, columns_list): |
52
|
|
|
for current_column in columns_list: |
53
|
|
|
input_data_frame['Weekday for ' + current_column] = input_data_frame[current_column] \ |
54
|
|
|
.apply(lambda x: x.strftime('%A')) |
55
|
|
|
return input_data_frame |
56
|
|
|
|
57
|
|
|
@staticmethod |
58
|
|
|
def fn_convert_datetime_columns_to_string(input_data_frame, columns_list, columns_format): |
59
|
|
|
for current_column in columns_list: |
60
|
|
|
input_data_frame[current_column] = \ |
61
|
|
|
input_data_frame[current_column].map(lambda x: x.strftime(columns_format)) |
62
|
|
|
return input_data_frame |
63
|
|
|
|
64
|
|
|
@staticmethod |
65
|
|
|
def fn_convert_string_columns_to_datetime(input_data_frame, columns_list, columns_format): |
66
|
|
|
for current_column in columns_list: |
67
|
|
|
input_data_frame[current_column] = pandas.to_datetime(input_data_frame[current_column], |
68
|
|
|
format=columns_format) |
69
|
|
|
return input_data_frame |
70
|
|
|
|
71
|
|
|
@staticmethod |
72
|
|
|
def fn_get_first_current_and_last_column_value_from_data_frame(in_data_frame, in_column_name): |
73
|
|
|
return { |
74
|
|
|
'first': in_data_frame.iloc[0][in_column_name], |
75
|
|
|
'current': in_data_frame.query('`Timeline Evaluation` == "Current"', |
76
|
|
|
inplace=False)[in_column_name].max(), |
77
|
|
|
'last': in_data_frame.iloc[(len(in_data_frame) - 1)][in_column_name], |
78
|
|
|
} |
79
|
|
|
|