Conditions | 5 |
Total Lines | 146 |
Code Lines | 47 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.
There are several approaches to avoid long parameter lists:
1 | ''' |
||
66 | def corr_plot(data, split=None, threshold=0, target=None, method='pearson', cmap='BrBG', figsize=(12, 10), annot=True, |
||
67 | dev=False, **kwargs): |
||
68 | ''' |
||
69 | Two-dimensional visualization of the correlation between feature-columns, excluding NA values. |
||
70 | |||
71 | Parameters |
||
72 | ---------- |
||
73 | data: 2D dataset that can be coerced into Pandas DataFrame. If a Pandas DataFrame is provided, the index/column \ |
||
74 | information is used to label the plots. |
||
75 | |||
76 | split: {None, 'pos', 'neg', 'high', 'low'}, default None |
||
77 | Type of split to be performed. |
||
78 | |||
79 | * None: visualize all correlations between the feature-columns. |
||
80 | * pos: visualize all positive correlations between the feature-columns above the threshold. |
||
81 | * neg: visualize all negative correlations between the feature-columns below the threshold. |
||
82 | * high: visualize all correlations between the feature-columns for which abs(corr) > threshold is True. |
||
83 | * low: visualize all correlations between the feature-columns for which abs(corr) < threshold is True. |
||
84 | |||
85 | threshold: float, default 0 |
||
86 | Value between 0 <= threshold <= 1 |
||
87 | |||
88 | target: string, list, np.array or pd.Series, default None |
||
89 | Specify target for correlation. E.g. label column to generate only the correlations between each feature\ |
||
90 | and the label. |
||
91 | |||
92 | method: {'pearson', 'spearman', 'kendall'}, default 'pearson' |
||
93 | * pearson: measures linear relationships and requires normally distributed and homoscedastic data. |
||
94 | * spearman: ranked/ordinal correlation, measures monotonic relationships. |
||
95 | * kendall: ranked/ordinal correlation, measures monotonic relationships. Computationally more expensive but |
||
96 | more robust in smaller dataets than 'spearman'. |
||
97 | |||
98 | cmap: matplotlib colormap name or object, or list of colors, default 'BrBG' |
||
99 | The mapping from data values to color space. |
||
100 | |||
101 | figsize: tuple, default (12, 10) |
||
102 | Use to control the figure size. |
||
103 | |||
104 | annot: bool, default True |
||
105 | Use to show or hide annotations. |
||
106 | |||
107 | dev: bool, default False |
||
108 | Display figure settings in the plot by setting dev = True. If False, the settings are not displayed.s |
||
109 | |||
110 | **kwargs: optional |
||
111 | Additional elements to control the visualization of the plot, e.g.: |
||
112 | |||
113 | * mask: bool, default True |
||
114 | If set to False the entire correlation matrix, including the upper triangle is shown. Set dev = False in this \ |
||
115 | case to avoid overlap. |
||
116 | * vmax: float, default is calculated from the given correlation coefficients. |
||
117 | Value between -1 or vmin <= vmax <= 1, limits the range of the colorbar. |
||
118 | * vmin: float, default is calculated from the given correlation coefficients. |
||
119 | Value between -1 <= vmin <= 1 or vmax, limits the range of the colorbar. |
||
120 | * linewidths: float, default 0.5 |
||
121 | Controls the line-width inbetween the squares. |
||
122 | * annot_kws: dict, default {'size' : 10} |
||
123 | Controls the font size of the annotations. Only available when annot = True. |
||
124 | * cbar_kws: dict, default {'shrink': .95, 'aspect': 30} |
||
125 | Controls the size of the colorbar. |
||
126 | * Many more kwargs are available, i.e. 'alpha' to control blending, or options to adjust labels, ticks ... |
||
127 | |||
128 | Kwargs can be supplied through a dictionary of key-value pairs (see above). |
||
129 | |||
130 | Returns |
||
131 | ------- |
||
132 | ax: matplotlib Axes |
||
133 | Returns the Axes object with the plot for further tweaking. |
||
134 | ''' |
||
135 | |||
136 | # Validate Inputs |
||
137 | _validate_input_0_1(threshold, 'threshold') |
||
138 | _validate_input_bool(annot, 'annot') |
||
139 | _validate_input_bool(dev, 'dev') |
||
140 | |||
141 | data = pd.DataFrame(data) |
||
142 | mask = False |
||
143 | square = False |
||
144 | |||
145 | # Obtain correlations |
||
146 | if isinstance(target, (str, list, pd.Series, np.ndarray)): |
||
147 | if isinstance(target, str): |
||
148 | target_data = data[target] |
||
149 | data = data.drop(target, axis=1) |
||
150 | |||
151 | elif isinstance(target, (list, pd.Series, np.ndarray)): |
||
152 | target_data = pd.Series(target) |
||
153 | |||
154 | corr = pd.DataFrame(data.corrwith(target_data)) |
||
|
|||
155 | corr = _corr_selector(corr, split=split, threshold=threshold) |
||
156 | vmax = np.round(np.nanmax(corr)-0.05, 2) |
||
157 | vmin = np.round(np.nanmin(corr)+0.05, 2) |
||
158 | |||
159 | else: |
||
160 | corr = corr_mat(data, split=split, threshold=threshold, method=method).data |
||
161 | |||
162 | mask = np.triu(np.ones_like(corr, dtype=np.bool)) # Generate mask for the upper triangle |
||
163 | square = True |
||
164 | |||
165 | vmax = np.round(np.nanmax(corr.where(~mask))-0.05, 2) |
||
166 | vmin = np.round(np.nanmin(corr.where(~mask))+0.05, 2) |
||
167 | |||
168 | fig, ax = plt.subplots(figsize=figsize) |
||
169 | |||
170 | # Specify kwargs for the heatmap |
||
171 | kwargs = {'mask': mask, |
||
172 | 'cmap': cmap, |
||
173 | 'annot': annot, |
||
174 | 'vmax': vmax, |
||
175 | 'vmin': vmin, |
||
176 | 'linewidths': .5, |
||
177 | 'annot_kws': {'size': 10}, |
||
178 | 'cbar_kws': {'shrink': .95, 'aspect': 30}, |
||
179 | **kwargs} |
||
180 | |||
181 | # Draw heatmap with mask and some default settings |
||
182 | sns.heatmap(corr, |
||
183 | center=0, |
||
184 | square=square, |
||
185 | fmt='.2f', |
||
186 | **kwargs |
||
187 | ) |
||
188 | |||
189 | ax.set_title(f'Feature-correlation ({method})', fontdict={'fontsize': 18}) |
||
190 | |||
191 | # Display settings |
||
192 | if dev: |
||
193 | fig.suptitle(f"\ |
||
194 | Settings (dev-mode): \n\ |
||
195 | - split-mode: {split} \n\ |
||
196 | - threshold: {threshold} \n\ |
||
197 | - method: {method} \n\ |
||
198 | - annotations: {annot} \n\ |
||
199 | - cbar: \n\ |
||
200 | - vmax: {vmax} \n\ |
||
201 | - vmin: {vmin} \n\ |
||
202 | - linewidths: {kwargs['linewidths']} \n\ |
||
203 | - annot_kws: {kwargs['annot_kws']} \n\ |
||
204 | - cbar_kws: {kwargs['cbar_kws']}", |
||
205 | fontsize=12, |
||
206 | color='gray', |
||
207 | x=0.35, |
||
208 | y=0.85, |
||
209 | ha='left') |
||
210 | |||
211 | return ax |
||
212 | |||
462 |