Conditions | 7 |
Total Lines | 124 |
Code Lines | 43 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | ''' |
||
127 | def corr_plot(data, split=None, threshold=0, cmap=sns.color_palette("BrBG", 250), figsize=(12, 10), dev=False, **kwargs): |
||
128 | ''' |
||
129 | Two-dimensional visualization of the correlation between feature-columns, excluding NA values. |
||
130 | |||
131 | Parameters: |
||
132 | ---------- |
||
133 | data: 2D dataset that can be coerced into an ndarray. If a Pandas DataFrame is provided, the index/column information will be used to label the columns and rows. |
||
134 | |||
135 | split: {'None', 'pos', 'neg', 'high', 'low'}, default 'None' |
||
136 | Type of split to be performed. |
||
137 | |||
138 | * None: visualize all correlations between the feature-columns. |
||
139 | * pos: visualize all positive correlations between the feature-columns above the threshold. |
||
140 | * neg: visualize all negative correlations between the feature-columns below the threshold. |
||
141 | * high: visualize all correlations between the feature-columns for which abs(corr) > threshold is True. |
||
142 | * low: visualize all correlations between the feature-columns for which abs(corr) < threshold is True. |
||
143 | |||
144 | threshold: float, default 0 |
||
145 | Value between 0 <= threshold <= 1 |
||
146 | |||
147 | cmap: matplotlib colormap name or object, or list of colors, default 'BrBG' |
||
148 | The mapping from data values to color space. |
||
149 | |||
150 | figsize: tuple, default (12, 10) |
||
151 | Use to control the figure size. |
||
152 | |||
153 | dev: bool, default False |
||
154 | Display figure settings in the plot by setting dev = True. If False, the settings are not displayed. Use for presentations. |
||
155 | |||
156 | **kwargs: optional |
||
157 | Additional elements to control the visualization of the plot, e.g.: |
||
158 | |||
159 | * mask: bool, default True |
||
160 | If set to False the entire correlation matrix, including the upper triangle is shown. Set dev = False in this case to avoid overlap. |
||
161 | * cmap: matplotlib colormap name or object, or list of colors, optional |
||
162 | The mapping from data values to color space. If not provided, the |
||
163 | default is sns.color_palette("BrBG", 150). |
||
164 | * annot:bool, default True for 20 or less columns, False for more than 20 feature-columns. |
||
165 | * vmax: float, default is calculated from the given correlation coefficients. |
||
166 | Value between -1 or vmin <= vmax <= 1, limits the range of the colorbar. |
||
167 | * vmin: float, default is calculated from the given correlation coefficients. |
||
168 | Value between -1 <= vmin <= 1 or vmax, limits the range of the colorbar. |
||
169 | * linewidths: float, default 0.5 |
||
170 | Controls the line-width inbetween the squares. |
||
171 | * annot_kws: dict, default {'size' : 10} |
||
172 | Controls the font size of the annotations. Only available when annot = True. |
||
173 | * cbar_kws: dict, default {'shrink': .95, 'aspect': 30} |
||
174 | Controls the size of the colorbar. |
||
175 | * Many more kwargs are available, i.e. 'alpha' to control blending, or options to adjust labels, ticks ... |
||
176 | |||
177 | Kwargs can be supplied through a dictionary of key-value pairs (see above). |
||
178 | |||
179 | Returns: |
||
180 | ------- |
||
181 | ax: matplotlib Axes. Axes object with the heatmap. |
||
182 | ''' |
||
183 | |||
184 | if split == 'pos': |
||
185 | corr = data.corr().where((data.corr() >= threshold) & (data.corr() > 0)) |
||
186 | threshold = '-' |
||
187 | elif split == 'neg': |
||
188 | corr = data.corr().where((data.corr() <= threshold) & (data.corr() < 0)) |
||
189 | threshold = '-' |
||
190 | elif split == 'high': |
||
191 | corr = data.corr().where(np.abs(data.corr()) >= threshold) |
||
192 | elif split == 'low': |
||
193 | corr = data.corr().where(np.abs(data.corr()) <= threshold) |
||
194 | else: |
||
195 | corr = data.corr() |
||
196 | split = "full" |
||
197 | threshold = 'None' |
||
198 | |||
199 | # Generate mask for the upper triangle |
||
200 | mask = np.triu(np.ones_like(corr, dtype=np.bool)) |
||
201 | |||
202 | # Compute dimensions and correlation range to adjust settings |
||
203 | annot = True if np.max(corr.shape) < 21 else False |
||
204 | vmax = np.round(np.nanmax(corr.where(mask == False))-0.05, 2) |
||
205 | vmin = np.round(np.nanmin(corr.where(mask == False))+0.05, 2) |
||
206 | |||
207 | # Set up the matplotlib figure and generate colormap |
||
208 | fig, ax = plt.subplots(figsize=figsize) |
||
209 | |||
210 | # kwargs for the heatmap |
||
211 | kwargs = {'mask': mask, |
||
212 | 'cmap': cmap, |
||
213 | 'annot': annot, |
||
214 | 'vmax': vmax, |
||
215 | 'vmin': vmin, |
||
216 | 'linewidths': .5, |
||
217 | 'annot_kws': {'size': 10}, |
||
218 | 'cbar_kws': {'shrink': .95, 'aspect': 30}, |
||
219 | **kwargs} |
||
220 | |||
221 | # Draw heatmap with mask and some default settings |
||
222 | sns.heatmap(corr, |
||
223 | center=0, |
||
224 | square=True, |
||
225 | fmt='.2f', |
||
226 | **kwargs |
||
227 | ) |
||
228 | |||
229 | ax.set_title('Feature-correlation Matrix', fontdict={'fontsize': 18}) |
||
230 | |||
231 | if dev == False: |
||
232 | pass |
||
233 | else: # show settings |
||
234 | fig.suptitle(f"\ |
||
235 | Settings (dev-mode): \n\ |
||
236 | - split-mode: {split} \n\ |
||
237 | - threshold: {threshold} \n\ |
||
238 | - cbar: \n\ |
||
239 | - vmax: {vmax} \n\ |
||
240 | - vmin: {vmin} \n\ |
||
241 | - linewidths: {kwargs['linewidths']} \n\ |
||
242 | - annot_kws: {kwargs['annot_kws']} \n\ |
||
243 | - cbar_kws: {kwargs['cbar_kws']}", |
||
244 | fontsize=12, |
||
245 | color='gray', |
||
246 | x=0.35, |
||
247 | y=0.8, |
||
248 | ha='left') |
||
249 | |||
250 | return ax |
||
251 | |||
263 |