Passed
Push — main ( da7c7b...bd22a8 )
by Eran
01:34
created

color_benchmark   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 113
Duplicated Lines 53.1 %

Importance

Changes 0
Metric Value
wmc 16
eloc 68
dl 60
loc 113
rs 10
c 0
b 0
f 0

3 Functions

Rating   Name   Duplication   Size   Complexity  
A node_color_mapping_pure() 30 30 5
B run_benchmark() 0 32 6
B node_color_mapping_numpy() 30 30 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import functools
2
import timeit
3
from collections.abc import Mapping
4
from typing import Union
5
6
import matplotlib as mpl
7
import networkx as nx
8
import numpy as np
9
10
11
# --- Original Implementation ---
12 View Code Duplication
@functools.lru_cache
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
13
def node_color_mapping_numpy(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping:
14
    if not graph.nodes:
15
        return {}
16
17
    node_type_keys = graph.graph.get('node_types', {}).keys()
18
19
    if len(node_type_keys) > 1 and 'node' in node_type_keys:
20
        final_keys = [k for k in node_type_keys if k != 'node']
21
    else:
22
        final_keys = list(node_type_keys)
23
24
    type_lookup = {t: i for i, t in enumerate(final_keys)}
25
26
    color_values_ndarray = np.fromiter(
27
        (type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable node does not seem to be defined.
Loading history...
28
        dtype=int,
29
        count=len(graph),
30
    )
31
    if len(color_values_ndarray) > 1:
32
        low, high = color_values_ndarray.min(), color_values_ndarray.max()
33
    else:
34
        low = high = 0
35
36
    norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True)
37
    mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
38
    colors = mapper.to_rgba(color_values_ndarray).tolist()
39
40
    color_mapping = dict(zip(graph.nodes, colors))
41
    return color_mapping
42
43
44
# --- Alternative Implementation (No NumPy) ---
45 View Code Duplication
@functools.lru_cache
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
46
def node_color_mapping_pure(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping:
47
    if not graph.nodes:
48
        return {}
49
50
    node_type_keys = graph.graph.get('node_types', {}).keys()
51
52
    if len(node_type_keys) > 1 and 'node' in node_type_keys:
53
        final_keys = [k for k in node_type_keys if k != 'node']
54
    else:
55
        final_keys = list(node_type_keys)
56
57
    type_lookup = {t: i for i, t in enumerate(final_keys)}
58
59
    # Generate indices directly
60
    color_indices = [type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes]
61
62
    if len(color_indices) > 1:
63
        low, high = min(color_indices), max(color_indices)
64
    else:
65
        low = high = 0
66
67
    norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True)
68
    mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
69
70
    # mapper.to_rgba accepts a list of values
71
    colors = mapper.to_rgba(color_indices).tolist()
72
73
    color_mapping = dict(zip(graph.nodes, colors))
74
    return color_mapping
75
76
77
def run_benchmark():
78
    # Setup a dummy graph
79
    g = nx.Graph()
80
    g.graph['node_types'] = {'user': {}, 'post': {}, 'comment': {}}
81
82
    # Add a reasonable number of nodes to make the benchmark meaningful
83
    num_nodes = 10000
84
    for i in range(num_nodes):
85
        node_type = 'user' if i % 3 == 0 else ('post' if i % 3 == 1 else 'comment')
86
        g.add_node(f"n{i}", type=node_type)
87
88
    # Warmup
89
    node_color_mapping_numpy(g)
90
    node_color_mapping_pure(g)
91
92
    # Clear cache for fair comparison if we were calling it multiple times, 
93
    # but here we just want to measure execution time of the logic.
94
    # Since lru_cache is on, subsequent calls would be instant.
95
    # We need to bypass the wrapper or clear cache.
96
    node_color_mapping_numpy.cache_clear()
97
    node_color_mapping_pure.cache_clear()
98
99
    print(f"Benchmarking with {num_nodes} nodes...")
100
101
    t_numpy = timeit.timeit(lambda: node_color_mapping_numpy(g), number=1000)
102
    print(f"NumPy version: {t_numpy:.4f} seconds (1000 runs)")
103
104
    t_pure = timeit.timeit(lambda: node_color_mapping_pure(g), number=1000)
105
    print(f"Pure Python version: {t_pure:.4f} seconds (1000 runs)")
106
107
    diff = (t_pure - t_numpy) / t_numpy * 100
108
    print(f"Difference: {diff:.2f}%")
109
110
111
if __name__ == "__main__":
112
    run_benchmark()
113