|
1
|
|
|
import random |
|
2
|
|
|
from multivalued_dict_package import * #lgtm [py/polluting-import] |
|
3
|
|
|
from shuffle_graph_package import * #lgtm [py/polluting-import] |
|
4
|
|
|
from count_dict_package import count_dict |
|
5
|
|
|
from networkx.classes.graph import Graph #lgtm [py/unused-import] |
|
6
|
|
|
from networkx.classes.digraph import DiGraph |
|
7
|
|
|
from networkx.classes.multigraph import MultiGraph #lgtm [py/unused-import] |
|
8
|
|
|
from networkx.classes.multidigraph import MultiDiGraph #lgtm [py/unused-import] |
|
9
|
|
|
|
|
10
|
|
|
__all__ = ['ASLPAw'] |
|
11
|
|
|
|
|
12
|
|
|
def __ASLPAw_networkx(data_graph: 'graph', Repeat_T: int, seed: int) -> DiGraph: |
|
13
|
|
|
def __remove_low_frequency_label(community_label_queues_for_nodes: multivalued_dict) -> DiGraph: |
|
14
|
|
|
from sklearn.ensemble import IsolationForest |
|
15
|
|
|
digraph_of_node_labels_and_frequencies = DiGraph() |
|
16
|
|
|
for graph_of_node, label_list_of_nodes in community_label_queues_for_nodes.items(): |
|
17
|
|
|
digraph_of_node_labels_and_frequencies.add_node(graph_of_node) |
|
18
|
|
|
label_set = set(label_list_of_nodes) |
|
19
|
|
|
dict_of_frequency_of_label = dict(sorted([(label_list_of_nodes.count(label_item), label_item) for label_item in label_set], key = lambda frequency_and_label: frequency_and_label[0], reverse = True)) |
|
20
|
|
|
dict_of_sn_and_frequency = dict([(sequence_number, frequency_of_label) for sequence_number, frequency_of_label in enumerate(dict_of_frequency_of_label.keys(), 1)]) |
|
21
|
|
|
list_of_mapping_points = [] |
|
22
|
|
|
for sequence_number, frequency_of_label in dict_of_sn_and_frequency.items(): |
|
23
|
|
|
list_of_mapping_points.extend([[sequence_number]] * frequency_of_label) |
|
24
|
|
|
clf = IsolationForest(n_estimators = 120, contamination = 'auto', behaviour = 'new') |
|
25
|
|
|
clf.fit(list_of_mapping_points) |
|
26
|
|
|
for sequence_number, frequency_of_label in dict_of_sn_and_frequency.items(): |
|
27
|
|
|
if clf.predict([[sequence_number]])[0] == 1: |
|
28
|
|
|
label_item = dict_of_frequency_of_label.__getitem__(frequency_of_label) |
|
29
|
|
|
digraph_of_node_labels_and_frequencies.add_edge(graph_of_node, label_item, weight = frequency_of_label) |
|
30
|
|
|
return digraph_of_node_labels_and_frequencies |
|
31
|
|
|
|
|
32
|
|
|
community_label_queues_for_nodes = multivalued_dict([[node_of_graph, node_of_graph] for node_of_graph in data_graph.nodes]) |
|
33
|
|
|
|
|
34
|
|
|
random.seed(seed) |
|
35
|
|
|
|
|
36
|
|
|
shuffle_number = calculate_number_of_shuffles_required_under_default_random_function(data_graph.number_of_nodes()) |
|
37
|
|
|
|
|
38
|
|
|
for _t in range(Repeat_T): |
|
39
|
|
|
data_graph = shuffle_graph(data_graph, shuffle_number, seed) |
|
40
|
|
|
for data_graph_node, dict_of_adjvex in data_graph.adjacency(): |
|
41
|
|
|
weight_of_community_label_for_adjvex = count_dict() |
|
42
|
|
|
for adjvex in dict_of_adjvex.keys(): |
|
43
|
|
|
if data_graph.is_multigraph(): |
|
44
|
|
|
weight_of_edge = sum(value_of_edge.get('weight', 1) for value_of_edge in dict_of_adjvex.__getitem__(adjvex).values()) |
|
45
|
|
|
else: |
|
46
|
|
|
weight_of_edge = dict_of_adjvex.__getitem__(adjvex).get('weight', 1) |
|
47
|
|
|
community_label_for_adjvex = random.choice(community_label_queues_for_nodes.__getitem__(adjvex)) |
|
48
|
|
|
weight_of_community_label_for_adjvex[community_label_for_adjvex] += weight_of_edge |
|
49
|
|
|
community_label_for_node = max(weight_of_community_label_for_adjvex, key = weight_of_community_label_for_adjvex.__getitem__, default = data_graph_node) |
|
50
|
|
|
community_label_queues_for_nodes.update({data_graph_node: community_label_for_node}) |
|
51
|
|
|
|
|
52
|
|
|
digraph_of_node_labels_and_frequencies = __remove_low_frequency_label(community_label_queues_for_nodes) |
|
53
|
|
|
return digraph_of_node_labels_and_frequencies |
|
54
|
|
|
|
|
55
|
|
|
def ASLPAw(data_graph: 'graph', Repeat_T: int = 30, seed: int = None, graph_package = 'NetworkX') -> DiGraph: |
|
56
|
|
|
''' |
|
57
|
|
|
Returns a graph of the edges of each node with its own community tag node. |
|
58
|
|
|
|
|
59
|
|
|
ASLPAw can be used for disjoint and overlapping community detection and works on weighted/unweighted and directed/undirected networks. ASLPAw is adaptive with virtually no configuration parameters. |
|
60
|
|
|
|
|
61
|
|
|
Parameters |
|
62
|
|
|
---------- |
|
63
|
|
|
data_graph : graph |
|
64
|
|
|
A graph object. According to the package selected by the parameter graph_package, "data_graph" can accept graph objects of the corresponding type. However, any package you choose can accept a "NetworkX" object. |
|
65
|
|
|
|
|
66
|
|
|
Repeat_T : integer |
|
67
|
|
|
ASLPAw is an iterative process, this parameter sets the number of iterations. |
|
68
|
|
|
|
|
69
|
|
|
seed : integer, random_state, or None (default) |
|
70
|
|
|
Indicator of random number generation state. |
|
71
|
|
|
|
|
72
|
|
|
Returns |
|
73
|
|
|
------- |
|
74
|
|
|
communities : DirectedGraph |
|
75
|
|
|
Each node uses a community discovery map with a weighted edge pointing to its own community tag node. |
|
76
|
|
|
|
|
77
|
|
|
Examples |
|
78
|
|
|
-------- |
|
79
|
|
|
>>> from networkx.generators.community import relaxed_caveman_graph |
|
80
|
|
|
>>> data_graph = relaxed_caveman_graph(3, 6, 0.22, seed = 65535) |
|
81
|
|
|
>>> ASLPAw(data_graph, seed=65535).adj |
|
82
|
|
|
AdjacencyView({0: {1: {'weight': 30}}, 1: {6: {'weight': 15}, 1: {'weight': 14}}, 6: {6: {'weight': 31}}, 2: {1: {'weight': 30}}, 3: {1: {'weight': 29}}, 4: {1: {'weight': 30}}, 5: {1: {'weight': 30}}, 7: {6: {'weight': 30}}, 8: {6: {'weight': 29}}, 9: {6: {'weight': 29}}, 10: {6: {'weight': 25}}, 11: {6: {'weight': 28}}, 12: {15: {'weight': 19}}, 15: {15: {'weight': 24}}, 13: {15: {'weight': 22}}, 14: {15: {'weight': 22}}, 16: {15: {'weight': 19}}, 17: {15: {'weight': 19}}}) |
|
83
|
|
|
|
|
84
|
|
|
>>> data_graph = relaxed_caveman_graph(3, 6, 0.39, seed = 65535) |
|
85
|
|
|
>>> ASLPAw(data_graph, seed=65535).adj |
|
86
|
|
|
AdjacencyView({0: {3: {'weight': 25}}, 3: {3: {'weight': 27}}, 1: {3: {'weight': 26}}, 2: {3: {'weight': 28}}, 4: {3: {'weight': 29}}, 5: {3: {'weight': 29}}, 6: {6: {'weight': 30}}, 7: {6: {'weight': 30}}, 8: {6: {'weight': 21}}, 9: {6: {'weight': 27}}, 10: {3: {'weight': 20}}, 11: {6: {'weight': 27}}, 12: {15: {'weight': 16}, 6: {'weight': 13}}, 15: {}, 13: {6: {'weight': 19}}, 14: {6: {'weight': 20}}, 16: {15: {'weight': 17}, 6: {'weight': 12}}, 17: {15: {'weight': 18}, 6: {'weight': 12}}}) |
|
87
|
|
|
''' |
|
88
|
|
|
|
|
89
|
|
|
if graph_package == 'NetworkX': |
|
90
|
|
|
return __ASLPAw_networkx(data_graph, Repeat_T, seed) |
|
91
|
|
|
elif graph_package == 'SNAP': |
|
92
|
|
|
pass |
|
93
|
|
|
elif graph_package == 'graph-tool': |
|
94
|
|
|
pass |
|
95
|
|
|
elif graph_package == 'igraph': |
|
96
|
|
|
pass |
|
97
|
|
|
else: |
|
98
|
|
|
raise ValueError(f'The value "{data_graph}" of the parameter "data_graph" is not one of "NetworkX", "SNAP", "graph-tool" or "igraph"!') |
|
99
|
|
|
|