| Conditions | 7 |
| Total Lines | 124 |
| Code Lines | 43 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | ''' |
||
| 127 | def corr_plot(data, split=None, threshold=0, cmap=sns.color_palette("BrBG", 250), figsize=(12, 10), dev=False, **kwargs): |
||
| 128 | ''' |
||
| 129 | Two-dimensional visualization of the correlation between feature-columns, excluding NA values. |
||
| 130 | |||
| 131 | Parameters: |
||
| 132 | ---------- |
||
| 133 | data: 2D dataset that can be coerced into an ndarray. If a Pandas DataFrame is provided, the index/column information will be used to label the columns and rows. |
||
| 134 | |||
| 135 | split: {'None', 'pos', 'neg', 'high', 'low'}, default 'None' |
||
| 136 | Type of split to be performed. |
||
| 137 | |||
| 138 | * None: visualize all correlations between the feature-columns. |
||
| 139 | * pos: visualize all positive correlations between the feature-columns above the threshold. |
||
| 140 | * neg: visualize all negative correlations between the feature-columns below the threshold. |
||
| 141 | * high: visualize all correlations between the feature-columns for which abs(corr) > threshold is True. |
||
| 142 | * low: visualize all correlations between the feature-columns for which abs(corr) < threshold is True. |
||
| 143 | |||
| 144 | threshold: float, default 0 |
||
| 145 | Value between 0 <= threshold <= 1 |
||
| 146 | |||
| 147 | cmap: matplotlib colormap name or object, or list of colors, default 'BrBG' |
||
| 148 | The mapping from data values to color space. |
||
| 149 | |||
| 150 | figsize: tuple, default (12, 10) |
||
| 151 | Use to control the figure size. |
||
| 152 | |||
| 153 | dev: bool, default False |
||
| 154 | Display figure settings in the plot by setting dev = True. If False, the settings are not displayed. Use for presentations. |
||
| 155 | |||
| 156 | **kwargs: optional |
||
| 157 | Additional elements to control the visualization of the plot, e.g.: |
||
| 158 | |||
| 159 | * mask: bool, default True |
||
| 160 | If set to False the entire correlation matrix, including the upper triangle is shown. Set dev = False in this case to avoid overlap. |
||
| 161 | * cmap: matplotlib colormap name or object, or list of colors, optional |
||
| 162 | The mapping from data values to color space. If not provided, the |
||
| 163 | default is sns.color_palette("BrBG", 150). |
||
| 164 | * annot:bool, default True for 20 or less columns, False for more than 20 feature-columns. |
||
| 165 | * vmax: float, default is calculated from the given correlation coefficients. |
||
| 166 | Value between -1 or vmin <= vmax <= 1, limits the range of the colorbar. |
||
| 167 | * vmin: float, default is calculated from the given correlation coefficients. |
||
| 168 | Value between -1 <= vmin <= 1 or vmax, limits the range of the colorbar. |
||
| 169 | * linewidths: float, default 0.5 |
||
| 170 | Controls the line-width inbetween the squares. |
||
| 171 | * annot_kws: dict, default {'size' : 10} |
||
| 172 | Controls the font size of the annotations. Only available when annot = True. |
||
| 173 | * cbar_kws: dict, default {'shrink': .95, 'aspect': 30} |
||
| 174 | Controls the size of the colorbar. |
||
| 175 | * Many more kwargs are available, i.e. 'alpha' to control blending, or options to adjust labels, ticks ... |
||
| 176 | |||
| 177 | Kwargs can be supplied through a dictionary of key-value pairs (see above). |
||
| 178 | |||
| 179 | Returns: |
||
| 180 | ------- |
||
| 181 | ax: matplotlib Axes. Axes object with the heatmap. |
||
| 182 | ''' |
||
| 183 | |||
| 184 | if split == 'pos': |
||
| 185 | corr = data.corr().where((data.corr() >= threshold) & (data.corr() > 0)) |
||
| 186 | threshold = '-' |
||
| 187 | elif split == 'neg': |
||
| 188 | corr = data.corr().where((data.corr() <= threshold) & (data.corr() < 0)) |
||
| 189 | threshold = '-' |
||
| 190 | elif split == 'high': |
||
| 191 | corr = data.corr().where(np.abs(data.corr()) >= threshold) |
||
| 192 | elif split == 'low': |
||
| 193 | corr = data.corr().where(np.abs(data.corr()) <= threshold) |
||
| 194 | else: |
||
| 195 | corr = data.corr() |
||
| 196 | split = "full" |
||
| 197 | threshold = 'None' |
||
| 198 | |||
| 199 | # Generate mask for the upper triangle |
||
| 200 | mask = np.triu(np.ones_like(corr, dtype=np.bool)) |
||
| 201 | |||
| 202 | # Compute dimensions and correlation range to adjust settings |
||
| 203 | annot = True if np.max(corr.shape) < 21 else False |
||
| 204 | vmax = np.round(np.nanmax(corr.where(mask == False))-0.05, 2) |
||
| 205 | vmin = np.round(np.nanmin(corr.where(mask == False))+0.05, 2) |
||
| 206 | |||
| 207 | # Set up the matplotlib figure and generate colormap |
||
| 208 | fig, ax = plt.subplots(figsize=figsize) |
||
| 209 | |||
| 210 | # kwargs for the heatmap |
||
| 211 | kwargs = {'mask': mask, |
||
| 212 | 'cmap': cmap, |
||
| 213 | 'annot': annot, |
||
| 214 | 'vmax': vmax, |
||
| 215 | 'vmin': vmin, |
||
| 216 | 'linewidths': .5, |
||
| 217 | 'annot_kws': {'size': 10}, |
||
| 218 | 'cbar_kws': {'shrink': .95, 'aspect': 30}, |
||
| 219 | **kwargs} |
||
| 220 | |||
| 221 | # Draw heatmap with mask and some default settings |
||
| 222 | sns.heatmap(corr, |
||
| 223 | center=0, |
||
| 224 | square=True, |
||
| 225 | fmt='.2f', |
||
| 226 | **kwargs |
||
| 227 | ) |
||
| 228 | |||
| 229 | ax.set_title('Feature-correlation Matrix', fontdict={'fontsize': 18}) |
||
| 230 | |||
| 231 | if dev == False: |
||
| 232 | pass |
||
| 233 | else: # show settings |
||
| 234 | fig.suptitle(f"\ |
||
| 235 | Settings (dev-mode): \n\ |
||
| 236 | - split-mode: {split} \n\ |
||
| 237 | - threshold: {threshold} \n\ |
||
| 238 | - cbar: \n\ |
||
| 239 | - vmax: {vmax} \n\ |
||
| 240 | - vmin: {vmin} \n\ |
||
| 241 | - linewidths: {kwargs['linewidths']} \n\ |
||
| 242 | - annot_kws: {kwargs['annot_kws']} \n\ |
||
| 243 | - cbar_kws: {kwargs['cbar_kws']}", |
||
| 244 | fontsize=12, |
||
| 245 | color='gray', |
||
| 246 | x=0.35, |
||
| 247 | y=0.8, |
||
| 248 | ha='left') |
||
| 249 | |||
| 250 | return ax |
||
| 251 | |||
| 263 |