|
1
|
|
|
import functools |
|
2
|
|
|
import timeit |
|
3
|
|
|
from collections.abc import Mapping |
|
4
|
|
|
from typing import Union |
|
5
|
|
|
|
|
6
|
|
|
import matplotlib as mpl |
|
7
|
|
|
import networkx as nx |
|
8
|
|
|
import numpy as np |
|
9
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
# --- Original Implementation --- |
|
12
|
|
View Code Duplication |
@functools.lru_cache |
|
|
|
|
|
|
13
|
|
|
def node_color_mapping_numpy(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping: |
|
14
|
|
|
if not graph.nodes: |
|
15
|
|
|
return {} |
|
16
|
|
|
|
|
17
|
|
|
node_type_keys = graph.graph.get('node_types', {}).keys() |
|
18
|
|
|
|
|
19
|
|
|
if len(node_type_keys) > 1 and 'node' in node_type_keys: |
|
20
|
|
|
final_keys = [k for k in node_type_keys if k != 'node'] |
|
21
|
|
|
else: |
|
22
|
|
|
final_keys = list(node_type_keys) |
|
23
|
|
|
|
|
24
|
|
|
type_lookup = {t: i for i, t in enumerate(final_keys)} |
|
25
|
|
|
|
|
26
|
|
|
color_values_ndarray = np.fromiter( |
|
27
|
|
|
(type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes), |
|
|
|
|
|
|
28
|
|
|
dtype=int, |
|
29
|
|
|
count=len(graph), |
|
30
|
|
|
) |
|
31
|
|
|
if len(color_values_ndarray) > 1: |
|
32
|
|
|
low, high = color_values_ndarray.min(), color_values_ndarray.max() |
|
33
|
|
|
else: |
|
34
|
|
|
low = high = 0 |
|
35
|
|
|
|
|
36
|
|
|
norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True) |
|
37
|
|
|
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap) |
|
38
|
|
|
colors = mapper.to_rgba(color_values_ndarray).tolist() |
|
39
|
|
|
|
|
40
|
|
|
color_mapping = dict(zip(graph.nodes, colors)) |
|
41
|
|
|
return color_mapping |
|
42
|
|
|
|
|
43
|
|
|
|
|
44
|
|
|
# --- Alternative Implementation (No NumPy) --- |
|
45
|
|
View Code Duplication |
@functools.lru_cache |
|
|
|
|
|
|
46
|
|
|
def node_color_mapping_pure(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping: |
|
47
|
|
|
if not graph.nodes: |
|
48
|
|
|
return {} |
|
49
|
|
|
|
|
50
|
|
|
node_type_keys = graph.graph.get('node_types', {}).keys() |
|
51
|
|
|
|
|
52
|
|
|
if len(node_type_keys) > 1 and 'node' in node_type_keys: |
|
53
|
|
|
final_keys = [k for k in node_type_keys if k != 'node'] |
|
54
|
|
|
else: |
|
55
|
|
|
final_keys = list(node_type_keys) |
|
56
|
|
|
|
|
57
|
|
|
type_lookup = {t: i for i, t in enumerate(final_keys)} |
|
58
|
|
|
|
|
59
|
|
|
# Generate indices directly |
|
60
|
|
|
color_indices = [type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes] |
|
61
|
|
|
|
|
62
|
|
|
if len(color_indices) > 1: |
|
63
|
|
|
low, high = min(color_indices), max(color_indices) |
|
64
|
|
|
else: |
|
65
|
|
|
low = high = 0 |
|
66
|
|
|
|
|
67
|
|
|
norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True) |
|
68
|
|
|
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap) |
|
69
|
|
|
|
|
70
|
|
|
# mapper.to_rgba accepts a list of values |
|
71
|
|
|
colors = mapper.to_rgba(color_indices).tolist() |
|
72
|
|
|
|
|
73
|
|
|
color_mapping = dict(zip(graph.nodes, colors)) |
|
74
|
|
|
return color_mapping |
|
75
|
|
|
|
|
76
|
|
|
|
|
77
|
|
|
def run_benchmark(): |
|
78
|
|
|
# Setup a dummy graph |
|
79
|
|
|
g = nx.Graph() |
|
80
|
|
|
g.graph['node_types'] = {'user': {}, 'post': {}, 'comment': {}} |
|
81
|
|
|
|
|
82
|
|
|
# Add a reasonable number of nodes to make the benchmark meaningful |
|
83
|
|
|
num_nodes = 10000 |
|
84
|
|
|
for i in range(num_nodes): |
|
85
|
|
|
node_type = 'user' if i % 3 == 0 else ('post' if i % 3 == 1 else 'comment') |
|
86
|
|
|
g.add_node(f"n{i}", type=node_type) |
|
87
|
|
|
|
|
88
|
|
|
# Warmup |
|
89
|
|
|
node_color_mapping_numpy(g) |
|
90
|
|
|
node_color_mapping_pure(g) |
|
91
|
|
|
|
|
92
|
|
|
# Clear cache for fair comparison if we were calling it multiple times, |
|
93
|
|
|
# but here we just want to measure execution time of the logic. |
|
94
|
|
|
# Since lru_cache is on, subsequent calls would be instant. |
|
95
|
|
|
# We need to bypass the wrapper or clear cache. |
|
96
|
|
|
node_color_mapping_numpy.cache_clear() |
|
97
|
|
|
node_color_mapping_pure.cache_clear() |
|
98
|
|
|
|
|
99
|
|
|
print(f"Benchmarking with {num_nodes} nodes...") |
|
100
|
|
|
|
|
101
|
|
|
t_numpy = timeit.timeit(lambda: node_color_mapping_numpy(g), number=1000) |
|
102
|
|
|
print(f"NumPy version: {t_numpy:.4f} seconds (1000 runs)") |
|
103
|
|
|
|
|
104
|
|
|
t_pure = timeit.timeit(lambda: node_color_mapping_pure(g), number=1000) |
|
105
|
|
|
print(f"Pure Python version: {t_pure:.4f} seconds (1000 runs)") |
|
106
|
|
|
|
|
107
|
|
|
diff = (t_pure - t_numpy) / t_numpy * 100 |
|
108
|
|
|
print(f"Difference: {diff:.2f}%") |
|
109
|
|
|
|
|
110
|
|
|
|
|
111
|
|
|
if __name__ == "__main__": |
|
112
|
|
|
run_benchmark() |
|
113
|
|
|
|