|
1
|
|
|
import requests |
|
2
|
|
|
import sys |
|
3
|
|
|
import socket |
|
4
|
|
|
import os |
|
5
|
|
|
import datetime |
|
6
|
|
|
from functools import reduce |
|
7
|
|
|
import time |
|
8
|
|
|
from random import randint |
|
9
|
|
|
DEBUG=False |
|
10
|
|
|
import gc |
|
11
|
|
|
def url_maker_following(Name,page_number): |
|
12
|
|
|
''' |
|
13
|
|
|
This function return github following page url |
|
14
|
|
|
:param Name: Username |
|
15
|
|
|
:param page_number: page nubmer of following page |
|
16
|
|
|
:type Name:str |
|
17
|
|
|
:type Page:int |
|
18
|
|
|
:return: github following url as string |
|
19
|
|
|
''' |
|
20
|
|
|
return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=following" |
|
21
|
|
|
def url_maker_follower(Name,page_number): |
|
22
|
|
|
''' |
|
23
|
|
|
This function return github follower page url |
|
24
|
|
|
:param Name: username |
|
25
|
|
|
:param page_number: page number of follower page |
|
26
|
|
|
:type Name:str |
|
27
|
|
|
:type page_number:int |
|
28
|
|
|
:return: github follower url as string |
|
29
|
|
|
''' |
|
30
|
|
|
return "https://github.com/" + Name + "?page=" + str(page_number) + "&tab=followers" |
|
31
|
|
|
|
|
32
|
|
|
def user_list_gen(input_string,follower_name): |
|
33
|
|
|
''' |
|
34
|
|
|
This function extract usernames from raw_html |
|
35
|
|
|
:param input_string: raw input html |
|
36
|
|
|
:param follower_name: follower_name |
|
37
|
|
|
:type input_string:str |
|
38
|
|
|
:type follower_name:str |
|
39
|
|
|
:return: user_list as list |
|
40
|
|
|
''' |
|
41
|
|
|
user_list=[] |
|
42
|
|
|
index=0 |
|
43
|
|
|
while(index!=-1): |
|
44
|
|
|
index=input_string.find('alt="@',index+6,len(input_string)) |
|
45
|
|
|
length=input_string[index+6:].find('"') |
|
46
|
|
|
user_name=input_string[index+6:index+6+length] |
|
47
|
|
|
if user_name!=follower_name: |
|
48
|
|
|
if user_name!=follower_name: |
|
49
|
|
|
user_list.append(user_name) |
|
50
|
|
|
return user_list[:-1] |
|
51
|
|
|
|
|
52
|
|
|
|
|
53
|
|
|
def get_html(url): |
|
54
|
|
|
''' |
|
55
|
|
|
This function extract raw_html file |
|
56
|
|
|
:param url: url |
|
57
|
|
|
:type url:str |
|
58
|
|
|
:return: html data |
|
59
|
|
|
''' |
|
60
|
|
|
time.sleep(create_random_sleep()) |
|
61
|
|
|
if internet()==True: |
|
62
|
|
|
new_session=requests.session() |
|
63
|
|
|
new_session.cookies.clear() |
|
64
|
|
|
raw_html=new_session.get(url) |
|
65
|
|
|
new_session.close() |
|
66
|
|
|
raw_data=raw_html.text |
|
67
|
|
|
if "Not Found" in raw_data: |
|
68
|
|
|
print("Invalid Github User") |
|
69
|
|
|
sys.exit() |
|
70
|
|
|
return raw_data |
|
71
|
|
|
else: |
|
72
|
|
|
print("Error In Internet") |
|
73
|
|
|
sys.exit() |
|
74
|
|
|
|
|
75
|
|
|
|
|
76
|
|
|
def end_check(input_string): |
|
77
|
|
|
''' |
|
78
|
|
|
This function check end page |
|
79
|
|
|
:param input_string: raw html |
|
80
|
|
|
:type input_string:str |
|
81
|
|
|
:return: True or False |
|
82
|
|
|
''' |
|
83
|
|
|
if input_string.find("reached the end")!=-1: |
|
84
|
|
|
return True |
|
85
|
|
|
else: |
|
86
|
|
|
return False |
|
87
|
|
|
def follower_list_gen(follower_name): |
|
88
|
|
|
''' |
|
89
|
|
|
This function generate follower_list |
|
90
|
|
|
:param follower_name: username |
|
91
|
|
|
:type follower_name:str |
|
92
|
|
|
:return: username follower list |
|
93
|
|
|
''' |
|
94
|
|
|
follower_list = [] |
|
95
|
|
|
page_number=0 |
|
96
|
|
|
while (True): |
|
97
|
|
|
page_number += 1 |
|
98
|
|
|
follower_url = url_maker_follower(follower_name, page_number) |
|
99
|
|
|
follower_html = get_html(follower_url) |
|
100
|
|
|
if end_check(follower_html) == True: |
|
101
|
|
|
break |
|
102
|
|
|
temp_list = user_list_gen(follower_html,follower_name) |
|
103
|
|
|
follower_list.extend(temp_list) |
|
104
|
|
|
return follower_list |
|
105
|
|
|
def following_list_gen(follower_name): |
|
106
|
|
|
''' |
|
107
|
|
|
This function generate following list |
|
108
|
|
|
:param follower_name: username |
|
109
|
|
|
:type follower_name:str |
|
110
|
|
|
:return: username following list |
|
111
|
|
|
''' |
|
112
|
|
|
following_list = [] |
|
113
|
|
|
page_number=0 |
|
114
|
|
|
while (True): |
|
115
|
|
|
page_number+=1 |
|
116
|
|
|
following_url = url_maker_following(follower_name, page_number) |
|
117
|
|
|
following_html = get_html(following_url) |
|
118
|
|
|
if end_check(following_html) == True: |
|
119
|
|
|
break |
|
120
|
|
|
temp_list = user_list_gen(following_html,follower_name) |
|
121
|
|
|
following_list.extend(temp_list) |
|
122
|
|
|
return following_list |
|
123
|
|
|
|
|
124
|
|
|
def error_log(msg): |
|
125
|
|
|
""" |
|
126
|
|
|
Create the errorlog of the app |
|
127
|
|
|
:param msg: error message |
|
128
|
|
|
:type msg:str |
|
129
|
|
|
""" |
|
130
|
|
|
if "log" not in os.listdir(): |
|
131
|
|
|
os.mkdir("log") |
|
132
|
|
|
file = open(reduce(os.path.join, [os.getcwd(), "log", "error_log.txt"]), "a") |
|
133
|
|
|
file.write(str(datetime.datetime.now()) + " --> " + str(msg) + "\n") |
|
134
|
|
|
file.close() |
|
135
|
|
|
|
|
136
|
|
|
def internet(host="8.8.8.8", port=53, timeout=3): |
|
137
|
|
|
""" |
|
138
|
|
|
Check Internet Connections. |
|
139
|
|
|
:param host: the host that check connection to |
|
140
|
|
|
:param port: port that check connection with |
|
141
|
|
|
:param timeout: times that check the connnection |
|
142
|
|
|
:type host:str |
|
143
|
|
|
:type port:int |
|
144
|
|
|
:type timeout:int |
|
145
|
|
|
:return bool: True if Connection is Stable |
|
146
|
|
|
>>> internet() # if there is stable internet connection |
|
147
|
|
|
True |
|
148
|
|
|
>>> internet() # if there is no stable internet connection |
|
149
|
|
|
False |
|
150
|
|
|
""" |
|
151
|
|
|
try: |
|
152
|
|
|
socket.setdefaulttimeout(timeout) |
|
153
|
|
|
socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) |
|
154
|
|
|
return True |
|
155
|
|
|
except Exception as ex: |
|
156
|
|
|
error_log(str(ex)) |
|
157
|
|
|
return False |
|
158
|
|
|
|
|
159
|
|
|
def create_random_sleep(index=1,min_time=1,max_time=3): |
|
160
|
|
|
''' |
|
161
|
|
|
This function generate sleep time with random processes |
|
162
|
|
|
:param index: index to determine first page and messages(index = 0 is for first page) |
|
163
|
|
|
:param min_time: minimum time of sleep |
|
164
|
|
|
:param max_time: maximum time of sleep |
|
165
|
|
|
:type index:int |
|
166
|
|
|
:type min_time:int |
|
167
|
|
|
:type max_time:int |
|
168
|
|
|
:return: time of sleep as integer (a number between max and min) |
|
169
|
|
|
''' |
|
170
|
|
|
if index==0: |
|
171
|
|
|
time_sleep = 5 |
|
172
|
|
|
if DEBUG==True: |
|
173
|
|
|
print("Wait "+str(time_sleep)+" sec for first search . . .") |
|
174
|
|
|
else: |
|
175
|
|
|
time_sleep = randint(min_time, max_time) |
|
176
|
|
|
if DEBUG==True: |
|
177
|
|
|
print("Wait "+str(time_sleep)+" sec for next search . . .") |
|
178
|
|
|
if DEBUG==True: |
|
179
|
|
|
print_line(70,"*") |
|
180
|
|
|
return time_sleep |
|
181
|
|
|
|
|
182
|
|
|
def print_line(number=30,char="-"): |
|
183
|
|
|
''' |
|
184
|
|
|
This function print line in screen |
|
185
|
|
|
:param number: number of items in each line |
|
186
|
|
|
:param char: each char of line |
|
187
|
|
|
:return: None |
|
188
|
|
|
''' |
|
189
|
|
|
line="" |
|
190
|
|
|
for i in range(number): |
|
191
|
|
|
line=line+char |
|
192
|
|
|
print(line) |
|
193
|
|
|
|
|
194
|
|
|
|
|
195
|
|
|
def follow(username): |
|
196
|
|
|
''' |
|
197
|
|
|
This function create following and follower list |
|
198
|
|
|
:param username: username |
|
199
|
|
|
:type username:str |
|
200
|
|
|
:return: (list_1,list_2) as tuple |
|
201
|
|
|
''' |
|
202
|
|
|
print("Collecting Follower Information ...") |
|
203
|
|
|
print_line(70, "*") |
|
204
|
|
|
list_1 = follower_list_gen(username) |
|
205
|
|
|
file = open(username + "_follower.log", "w") |
|
206
|
|
|
print(str(len(list_1)) + " Followers --> " + username + "_follower.log") |
|
207
|
|
|
print_line(70, "*") |
|
208
|
|
|
file.write("\n".join(list_1)) |
|
209
|
|
|
file.close() |
|
210
|
|
|
print('Collecting Following Informnation ...') |
|
211
|
|
|
print_line(70, "*") |
|
212
|
|
|
list_2 = following_list_gen(username) |
|
213
|
|
|
file = open(username + "_following.log", "w") |
|
214
|
|
|
print(str(len(list_2)) + " Following --> " + username + "_following.log") |
|
215
|
|
|
print_line(70, "*") |
|
216
|
|
|
file.write("\n".join(list_2)) |
|
217
|
|
|
file.close() |
|
218
|
|
|
return (list_1,list_2) |
|
219
|
|
|
|
|
220
|
|
|
def dif(list_1,list_2): |
|
221
|
|
|
''' |
|
222
|
|
|
This function generate dif files |
|
223
|
|
|
:param list_1:follower list |
|
224
|
|
|
:param list_2: following list |
|
225
|
|
|
:type list_1:list |
|
226
|
|
|
:type list_2:list |
|
227
|
|
|
:return: None |
|
228
|
|
|
''' |
|
229
|
|
|
file = open(username + "_dif1.log", "w") |
|
230
|
|
|
dif_list = list(set(list_2) - set(list_1)) |
|
231
|
|
|
print(str(len(dif_list)) + " Following - Not Follower --> " + username + "_dif1.log") |
|
232
|
|
|
print_line(70, "*") |
|
233
|
|
|
file.write("\n".join(dif_list)) |
|
234
|
|
|
file.close() |
|
235
|
|
|
file = open(username + "_dif2.log", "w") |
|
236
|
|
|
dif_list = list(set(list_1) - set(list_2)) |
|
237
|
|
|
print(str(len(dif_list)) + " Follower - Not Following --> " + username + "_dif2.log") |
|
238
|
|
|
print_line(70, "*") |
|
239
|
|
|
file.write("\n".join(dif_list)) |
|
240
|
|
|
file.close() |
|
241
|
|
|
if __name__=="__main__": |
|
242
|
|
|
time_1=time.perf_counter() |
|
243
|
|
|
username=input("Please Enter Your Github Username : ") |
|
244
|
|
|
(list_1,list_2)=follow(username) |
|
245
|
|
|
dif(list_1,list_2) |
|
246
|
|
|
time_2=time.perf_counter() |
|
247
|
|
|
print("Data Generated In "+str(time_2-time_1)+" sec") |
|
248
|
|
|
print("Log Files Are Ready --> " + os.getcwd()) |
|
249
|
|
|
gc.collect() |
|
250
|
|
|
|
|
251
|
|
|
|
|
252
|
|
|
|
|
253
|
|
|
|
|
254
|
|
|
|
|
255
|
|
|
|
|
256
|
|
|
|
|
257
|
|
|
|
|
258
|
|
|
|
|
259
|
|
|
|
|
260
|
|
|
|
|
261
|
|
|
|
|
262
|
|
|
|