1
|
|
|
#!/usr/bin/python |
2
|
|
|
|
3
|
|
|
from itertools import groupby |
4
|
|
|
from operator import itemgetter |
5
|
|
|
import sys |
6
|
|
|
|
7
|
|
|
|
8
|
|
|
def read_mapper_output(input_file, separator='\t'): |
9
|
|
|
''' |
10
|
|
|
This function read mapper output |
11
|
|
|
:param file: input file |
12
|
|
|
:type file : sys.stdin |
13
|
|
|
:param separator: separator string |
14
|
|
|
:type separator:str |
15
|
|
|
:return: yield in each iteration |
16
|
|
|
''' |
17
|
|
|
for line in input_file: |
18
|
|
|
yield line.rstrip().split(separator, 1) |
19
|
|
|
|
20
|
|
|
def sort(input_list): |
21
|
|
|
''' |
22
|
|
|
This function sort input list (method : selection sort) |
23
|
|
|
:param input_list: list of lists |
24
|
|
|
:type input_list:list |
25
|
|
|
:return: sorted list as list |
26
|
|
|
''' |
27
|
|
|
temp_list=input_list |
28
|
|
|
length=len(input_list) |
29
|
|
|
output=[] |
30
|
|
|
length_list=list(map(len,input_list)) |
31
|
|
|
i=0 |
32
|
|
|
while(i<length): |
33
|
|
|
max_item=max(length_list) |
34
|
|
|
max_index=length_list.index(max_item) |
35
|
|
|
length_list.remove(max_item) |
36
|
|
|
output.append(temp_list.pop(max_index)) |
37
|
|
|
i+=1 |
38
|
|
|
return output |
39
|
|
|
|
40
|
|
|
def main(separator='\t'): |
41
|
|
|
''' |
42
|
|
|
This function first read mapper output from terminal output (sys.stdin) and pass it to read_mapper to pretify it |
43
|
|
|
after that append words with same sorted face to a separated list and append each separated list to lager final list |
44
|
|
|
in final step call sort function to sort large list and print each item of this list |
45
|
|
|
:param separator: separator between sorted word and first word |
46
|
|
|
:type separator: str |
47
|
|
|
:return: None |
48
|
|
|
''' |
49
|
|
|
data = read_mapper_output(sys.stdin, separator=separator) |
50
|
|
|
final_list=[] |
51
|
|
|
for current_word, group in groupby(data, itemgetter(0)): |
52
|
|
|
anagram_list = list(set(anagram for current_word, anagram in group)) |
53
|
|
|
final_list.append(anagram_list) |
54
|
|
|
sorted_list=sort(final_list) |
55
|
|
|
for item in sorted_list: |
56
|
|
|
print "%s\t%s" % (len(item), item) |
57
|
|
|
|
58
|
|
|
|
59
|
|
|
if __name__ == "__main__": |
60
|
|
|
main() |
61
|
|
|
|