color_benchmark - Code Metrics - Inspection of "docs: add performance optimization guide using Net..." - erivlis/graphinate - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( da7c7b...bd22a8 )

by Eran

created 2026-01-18 11:31 UTC

color_benchmark A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	113
Duplicated Lines	53.1 %

Importance

Changes

Metric	Value
wmc	16
eloc	68
dl	60
loc	113
rs	10
c	0
b	0
f	0

3 Functions

Rating	Name	Duplication	Size	Complexity
A	node_color_mapping_pure()	30	30	5
B	run_benchmark()	0	32	6
B	node_color_mapping_numpy()	30	30	5

How to fix Duplicated Code

import functools
import timeit
from collections.abc import Mapping
from typing import Union

import matplotlib as mpl
import networkx as nx
import numpy as np


# --- Original Implementation ---
@functools.lru_cache

def node_color_mapping_numpy(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping:
    if not graph.nodes:
        return {}

    node_type_keys = graph.graph.get('node_types', {}).keys()

    if len(node_type_keys) > 1 and 'node' in node_type_keys:
        final_keys = [k for k in node_type_keys if k != 'node']
    else:
        final_keys = list(node_type_keys)

    type_lookup = {t: i for i, t in enumerate(final_keys)}

    color_values_ndarray = np.fromiter(
        (type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes),

        dtype=int,
        count=len(graph),
    )
    if len(color_values_ndarray) > 1:
        low, high = color_values_ndarray.min(), color_values_ndarray.max()
    else:
        low = high = 0

    norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True)
    mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
    colors = mapper.to_rgba(color_values_ndarray).tolist()

    color_mapping = dict(zip(graph.nodes, colors))
    return color_mapping


# --- Alternative Implementation (No NumPy) ---
@functools.lru_cache

def node_color_mapping_pure(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping:
    if not graph.nodes:
        return {}

    node_type_keys = graph.graph.get('node_types', {}).keys()

    if len(node_type_keys) > 1 and 'node' in node_type_keys:
        final_keys = [k for k in node_type_keys if k != 'node']
    else:
        final_keys = list(node_type_keys)

    type_lookup = {t: i for i, t in enumerate(final_keys)}

    # Generate indices directly
    color_indices = [type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes]

    if len(color_indices) > 1:
        low, high = min(color_indices), max(color_indices)
    else:
        low = high = 0

    norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True)
    mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)

    # mapper.to_rgba accepts a list of values
    colors = mapper.to_rgba(color_indices).tolist()

    color_mapping = dict(zip(graph.nodes, colors))
    return color_mapping


def run_benchmark():
    # Setup a dummy graph
    g = nx.Graph()
    g.graph['node_types'] = {'user': {}, 'post': {}, 'comment': {}}

    # Add a reasonable number of nodes to make the benchmark meaningful
    num_nodes = 10000
    for i in range(num_nodes):
        node_type = 'user' if i % 3 == 0 else ('post' if i % 3 == 1 else 'comment')
        g.add_node(f"n{i}", type=node_type)

    # Warmup
    node_color_mapping_numpy(g)
    node_color_mapping_pure(g)

    # Clear cache for fair comparison if we were calling it multiple times, 
    # but here we just want to measure execution time of the logic.
    # Since lru_cache is on, subsequent calls would be instant.
    # We need to bypass the wrapper or clear cache.
    node_color_mapping_numpy.cache_clear()
    node_color_mapping_pure.cache_clear()

    print(f"Benchmarking with {num_nodes} nodes...")

    t_numpy = timeit.timeit(lambda: node_color_mapping_numpy(g), number=1000)
    print(f"NumPy version: {t_numpy:.4f} seconds (1000 runs)")

    t_pure = timeit.timeit(lambda: node_color_mapping_pure(g), number=1000)
    print(f"Pure Python version: {t_pure:.4f} seconds (1000 runs)")

    diff = (t_pure - t_numpy) / t_numpy * 100
    print(f"Difference: {diff:.2f}%")


if __name__ == "__main__":
    run_benchmark()


1		import functools
2		import timeit
3		from collections.abc import Mapping
4		from typing import Union
5
6		import matplotlib as mpl
7		import networkx as nx
8		import numpy as np
9
10
11		# --- Original Implementation ---
12	View Code Duplication	@functools.lru_cache
		0 ignored issues – show Duplication introduced 2026-01-18 11:32 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
13		def node_color_mapping_numpy(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping:
14		if not graph.nodes:
15		return {}
16
17		node_type_keys = graph.graph.get('node_types', {}).keys()
18
19		if len(node_type_keys) > 1 and 'node' in node_type_keys:
20		final_keys = [k for k in node_type_keys if k != 'node']
21		else:
22		final_keys = list(node_type_keys)
23
24		type_lookup = {t: i for i, t in enumerate(final_keys)}
25
26		color_values_ndarray = np.fromiter(
27		(type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes),
		0 ignored issues – show Comprehensibility Best Practice introduced 2026-01-18 11:32 UTC by Report Bug Copy Issue Report The variable `node` does not seem to be defined. Loading history...
28		dtype=int,
29		count=len(graph),
30		)
31		if len(color_values_ndarray) > 1:
32		low, high = color_values_ndarray.min(), color_values_ndarray.max()
33		else:
34		low = high = 0
35
36		norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True)
37		mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
38		colors = mapper.to_rgba(color_values_ndarray).tolist()
39
40		color_mapping = dict(zip(graph.nodes, colors))
41		return color_mapping
42
43
44		# --- Alternative Implementation (No NumPy) ---
45	View Code Duplication	@functools.lru_cache
		0 ignored issues – show Duplication introduced 2026-01-18 11:32 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
46		def node_color_mapping_pure(graph: nx.Graph, cmap: Union[str, mpl.colors.Colormap] = "tab20") -> Mapping:
47		if not graph.nodes:
48		return {}
49
50		node_type_keys = graph.graph.get('node_types', {}).keys()
51
52		if len(node_type_keys) > 1 and 'node' in node_type_keys:
53		final_keys = [k for k in node_type_keys if k != 'node']
54		else:
55		final_keys = list(node_type_keys)
56
57		type_lookup = {t: i for i, t in enumerate(final_keys)}
58
59		# Generate indices directly
60		color_indices = [type_lookup.get(graph.nodes[node].get('type'), 0) for node in graph.nodes]
61
62		if len(color_indices) > 1:
63		low, high = min(color_indices), max(color_indices)
64		else:
65		low = high = 0
66
67		norm = mpl.colors.Normalize(vmin=low, vmax=high, clip=True)
68		mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
69
70		# mapper.to_rgba accepts a list of values
71		colors = mapper.to_rgba(color_indices).tolist()
72
73		color_mapping = dict(zip(graph.nodes, colors))
74		return color_mapping
75
76
77		def run_benchmark():
78		# Setup a dummy graph
79		g = nx.Graph()
80		g.graph['node_types'] = {'user': {}, 'post': {}, 'comment': {}}
81
82		# Add a reasonable number of nodes to make the benchmark meaningful
83		num_nodes = 10000
84		for i in range(num_nodes):
85		node_type = 'user' if i % 3 == 0 else ('post' if i % 3 == 1 else 'comment')
86		g.add_node(f"n{i}", type=node_type)
87
88		# Warmup
89		node_color_mapping_numpy(g)
90		node_color_mapping_pure(g)
91
92		# Clear cache for fair comparison if we were calling it multiple times,
93		# but here we just want to measure execution time of the logic.
94		# Since lru_cache is on, subsequent calls would be instant.
95		# We need to bypass the wrapper or clear cache.
96		node_color_mapping_numpy.cache_clear()
97		node_color_mapping_pure.cache_clear()
98
99		print(f"Benchmarking with {num_nodes} nodes...")
100
101		t_numpy = timeit.timeit(lambda: node_color_mapping_numpy(g), number=1000)
102		print(f"NumPy version: {t_numpy:.4f} seconds (1000 runs)")
103
104		t_pure = timeit.timeit(lambda: node_color_mapping_pure(g), number=1000)
105		print(f"Pure Python version: {t_pure:.4f} seconds (1000 runs)")
106
107		diff = (t_pure - t_numpy) / t_numpy * 100
108		print(f"Difference: {diff:.2f}%")
109
110
111		if __name__ == "__main__":
112		run_benchmark()
113

erivlis / graphinate

Push — main ( da7c7b...bd22a8 )

color_benchmark A

Complexity

Size/Duplication

Importance

3 Functions

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like