1
|
|
|
import requests |
2
|
|
|
import sys |
3
|
|
|
import socket |
4
|
|
|
import os |
5
|
|
|
import datetime |
6
|
|
|
from functools import reduce |
7
|
|
|
import time |
8
|
|
|
from random import randint |
9
|
|
|
DEBUG=False |
10
|
|
|
import gc |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
def zero_insert(input_string): |
14
|
|
|
''' |
15
|
|
|
This function get a string as input if input is one digit add a zero |
16
|
|
|
:param input_string: input digit az string |
17
|
|
|
:type input_string:str |
18
|
|
|
:return: modified output as str |
19
|
|
|
''' |
20
|
|
|
if len(input_string)==1: |
21
|
|
|
return "0"+input_string |
22
|
|
|
return input_string |
23
|
|
|
|
24
|
|
|
def time_convert(input_string): |
25
|
|
|
''' |
26
|
|
|
This function convert input_string from uptime from sec to DD,HH,MM,SS Format |
27
|
|
|
:param input_string: input time string in sec |
28
|
|
|
:type input_string:str |
29
|
|
|
:return: converted time as string |
30
|
|
|
''' |
31
|
|
|
input_sec=float(input_string) |
32
|
|
|
input_minute=input_sec//60 |
33
|
|
|
input_sec=int(input_sec-input_minute*60) |
34
|
|
|
input_hour=input_minute//60 |
35
|
|
|
input_minute=int(input_minute-input_hour*60) |
36
|
|
|
input_day=int(input_hour//24) |
37
|
|
|
input_hour=int(input_hour-input_day*24) |
38
|
|
|
return zero_insert(str(input_day))+" days, "+zero_insert(str(input_hour))+" hour, "+zero_insert(str(input_minute))+" minutes, "+zero_insert(str(input_sec))+" seconds" |
39
|
|
|
|
40
|
|
|
def url_maker_following(Name,page_number): |
41
|
|
|
''' |
42
|
|
|
This function return github following page url |
43
|
|
|
:param Name: Username |
44
|
|
|
:param page_number: page nubmer of following page |
45
|
|
|
:type Name:str |
46
|
|
|
:type Page:int |
47
|
|
|
:return: github following url as string |
48
|
|
|
''' |
49
|
|
|
return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=following" |
50
|
|
|
def url_maker_follower(Name,page_number): |
51
|
|
|
''' |
52
|
|
|
This function return github follower page url |
53
|
|
|
:param Name: username |
54
|
|
|
:param page_number: page number of follower page |
55
|
|
|
:type Name:str |
56
|
|
|
:type page_number:int |
57
|
|
|
:return: github follower url as string |
58
|
|
|
''' |
59
|
|
|
return "https://github.com/" + Name + "?page=" + str(page_number) + "&tab=followers" |
60
|
|
|
|
61
|
|
|
def user_list_gen(input_string,follower_name): |
62
|
|
|
''' |
63
|
|
|
This function extract usernames from raw_html |
64
|
|
|
:param input_string: raw input html |
65
|
|
|
:param follower_name: follower_name |
66
|
|
|
:type input_string:str |
67
|
|
|
:type follower_name:str |
68
|
|
|
:return: user_list as list |
69
|
|
|
''' |
70
|
|
|
user_list=[] |
71
|
|
|
index=0 |
72
|
|
|
while(index!=-1): |
73
|
|
|
index=input_string.find('alt="@',index+6,len(input_string)) |
74
|
|
|
length=input_string[index+6:].find('"') |
75
|
|
|
user_name=input_string[index+6:index+6+length] |
76
|
|
|
if user_name!=follower_name: |
77
|
|
|
if user_name!=follower_name: |
78
|
|
|
user_list.append(user_name) |
79
|
|
|
return user_list[:-1] |
80
|
|
|
|
81
|
|
|
|
82
|
|
|
def get_html(url): |
83
|
|
|
''' |
84
|
|
|
This function extract raw_html file |
85
|
|
|
:param url: url |
86
|
|
|
:type url:str |
87
|
|
|
:return: html data |
88
|
|
|
''' |
89
|
|
|
time.sleep(create_random_sleep()) |
90
|
|
|
if internet()==True: |
91
|
|
|
new_session=requests.session() |
92
|
|
|
new_session.cookies.clear() |
93
|
|
|
raw_html=new_session.get(url) |
94
|
|
|
new_session.close() |
95
|
|
|
raw_data=raw_html.text |
96
|
|
|
if "Not Found" in raw_data: |
97
|
|
|
print("Invalid Github User") |
98
|
|
|
sys.exit() |
99
|
|
|
return raw_data |
100
|
|
|
else: |
101
|
|
|
print("Error In Internet") |
102
|
|
|
sys.exit() |
103
|
|
|
|
104
|
|
|
|
105
|
|
|
def end_check(input_string): |
106
|
|
|
''' |
107
|
|
|
This function check end page |
108
|
|
|
:param input_string: raw html |
109
|
|
|
:type input_string:str |
110
|
|
|
:return: True or False |
111
|
|
|
''' |
112
|
|
|
if input_string.find("reached the end")!=-1: |
113
|
|
|
return True |
114
|
|
|
else: |
115
|
|
|
return False |
116
|
|
|
def follower_list_gen(follower_name): |
117
|
|
|
''' |
118
|
|
|
This function generate follower_list |
119
|
|
|
:param follower_name: username |
120
|
|
|
:type follower_name:str |
121
|
|
|
:return: username follower list |
122
|
|
|
''' |
123
|
|
|
follower_list = [] |
124
|
|
|
page_number=0 |
125
|
|
|
while (True): |
126
|
|
|
page_number += 1 |
127
|
|
|
follower_url = url_maker_follower(follower_name, page_number) |
128
|
|
|
follower_html = get_html(follower_url) |
129
|
|
|
if end_check(follower_html) == True: |
130
|
|
|
break |
131
|
|
|
temp_list = user_list_gen(follower_html,follower_name) |
132
|
|
|
follower_list.extend(temp_list) |
133
|
|
|
return follower_list |
134
|
|
|
def following_list_gen(follower_name): |
135
|
|
|
''' |
136
|
|
|
This function generate following list |
137
|
|
|
:param follower_name: username |
138
|
|
|
:type follower_name:str |
139
|
|
|
:return: username following list |
140
|
|
|
''' |
141
|
|
|
following_list = [] |
142
|
|
|
page_number=0 |
143
|
|
|
while (True): |
144
|
|
|
page_number+=1 |
145
|
|
|
following_url = url_maker_following(follower_name, page_number) |
146
|
|
|
following_html = get_html(following_url) |
147
|
|
|
if end_check(following_html) == True: |
148
|
|
|
break |
149
|
|
|
temp_list = user_list_gen(following_html,follower_name) |
150
|
|
|
following_list.extend(temp_list) |
151
|
|
|
return following_list |
152
|
|
|
|
153
|
|
|
def error_log(msg): |
154
|
|
|
""" |
155
|
|
|
Create the errorlog of the app |
156
|
|
|
:param msg: error message |
157
|
|
|
:type msg:str |
158
|
|
|
""" |
159
|
|
|
if "log" not in os.listdir(): |
160
|
|
|
os.mkdir("log") |
161
|
|
|
file = open(reduce(os.path.join, [os.getcwd(), "log", "error_log.txt"]), "a") |
162
|
|
|
file.write(str(datetime.datetime.now()) + " --> " + str(msg) + "\n") |
163
|
|
|
file.close() |
164
|
|
|
|
165
|
|
|
def internet(host="8.8.8.8", port=53, timeout=3): |
166
|
|
|
""" |
167
|
|
|
Check Internet Connections. |
168
|
|
|
:param host: the host that check connection to |
169
|
|
|
:param port: port that check connection with |
170
|
|
|
:param timeout: times that check the connnection |
171
|
|
|
:type host:str |
172
|
|
|
:type port:int |
173
|
|
|
:type timeout:int |
174
|
|
|
:return bool: True if Connection is Stable |
175
|
|
|
>>> internet() # if there is stable internet connection |
176
|
|
|
True |
177
|
|
|
>>> internet() # if there is no stable internet connection |
178
|
|
|
False |
179
|
|
|
""" |
180
|
|
|
try: |
181
|
|
|
socket.setdefaulttimeout(timeout) |
182
|
|
|
socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) |
183
|
|
|
return True |
184
|
|
|
except Exception as ex: |
185
|
|
|
error_log(str(ex)) |
186
|
|
|
return False |
187
|
|
|
|
188
|
|
|
def create_random_sleep(index=1,min_time=1,max_time=3): |
189
|
|
|
''' |
190
|
|
|
This function generate sleep time with random processes |
191
|
|
|
:param index: index to determine first page and messages(index = 0 is for first page) |
192
|
|
|
:param min_time: minimum time of sleep |
193
|
|
|
:param max_time: maximum time of sleep |
194
|
|
|
:type index:int |
195
|
|
|
:type min_time:int |
196
|
|
|
:type max_time:int |
197
|
|
|
:return: time of sleep as integer (a number between max and min) |
198
|
|
|
''' |
199
|
|
|
if index==0: |
200
|
|
|
time_sleep = 5 |
201
|
|
|
if DEBUG==True: |
202
|
|
|
print("Wait "+str(time_sleep)+" sec for first search . . .") |
203
|
|
|
else: |
204
|
|
|
time_sleep = randint(min_time, max_time) |
205
|
|
|
if DEBUG==True: |
206
|
|
|
print("Wait "+str(time_sleep)+" sec for next search . . .") |
207
|
|
|
if DEBUG==True: |
208
|
|
|
print_line(70,"*") |
209
|
|
|
return time_sleep |
210
|
|
|
|
211
|
|
|
def print_line(number=30,char="-"): |
212
|
|
|
''' |
213
|
|
|
This function print line in screen |
214
|
|
|
:param number: number of items in each line |
215
|
|
|
:param char: each char of line |
216
|
|
|
:return: None |
217
|
|
|
''' |
218
|
|
|
line="" |
219
|
|
|
for i in range(number): |
220
|
|
|
line=line+char |
221
|
|
|
print(line) |
222
|
|
|
|
223
|
|
|
|
224
|
|
|
def follow(username): |
225
|
|
|
''' |
226
|
|
|
This function create following and follower list |
227
|
|
|
:param username: username |
228
|
|
|
:type username:str |
229
|
|
|
:return: (list_1,list_2) as tuple |
230
|
|
|
''' |
231
|
|
|
print("Collecting Follower Information ...") |
232
|
|
|
print_line(70, "*") |
233
|
|
|
list_1 = follower_list_gen(username) |
234
|
|
|
file = open(username + "_follower.log", "w") |
235
|
|
|
print(str(len(list_1)) + " Followers --> " + username + "_follower.log") |
236
|
|
|
print_line(70, "*") |
237
|
|
|
file.write("\n".join(list_1)) |
238
|
|
|
file.close() |
239
|
|
|
print('Collecting Following Informnation ...') |
240
|
|
|
print_line(70, "*") |
241
|
|
|
list_2 = following_list_gen(username) |
242
|
|
|
file = open(username + "_following.log", "w") |
243
|
|
|
print(str(len(list_2)) + " Following --> " + username + "_following.log") |
244
|
|
|
print_line(70, "*") |
245
|
|
|
file.write("\n".join(list_2)) |
246
|
|
|
file.close() |
247
|
|
|
return (list_1,list_2) |
248
|
|
|
|
249
|
|
|
def dif(list_1,list_2): |
250
|
|
|
''' |
251
|
|
|
This function generate dif files |
252
|
|
|
:param list_1:follower list |
253
|
|
|
:param list_2: following list |
254
|
|
|
:type list_1:list |
255
|
|
|
:type list_2:list |
256
|
|
|
:return: None |
257
|
|
|
''' |
258
|
|
|
file = open(username + "_dif1.log", "w") |
259
|
|
|
dif_list = list(set(list_2) - set(list_1)) |
260
|
|
|
print(str(len(dif_list)) + " Following - Not Follower --> " + username + "_dif1.log") |
261
|
|
|
print_line(70, "*") |
262
|
|
|
file.write("\n".join(dif_list)) |
263
|
|
|
file.close() |
264
|
|
|
file = open(username + "_dif2.log", "w") |
265
|
|
|
dif_list = list(set(list_1) - set(list_2)) |
266
|
|
|
print(str(len(dif_list)) + " Follower - Not Following --> " + username + "_dif2.log") |
267
|
|
|
print_line(70, "*") |
268
|
|
|
file.write("\n".join(dif_list)) |
269
|
|
|
file.close() |
270
|
|
|
if __name__=="__main__": |
271
|
|
|
time_1=time.perf_counter() |
272
|
|
|
username=input("Please Enter Your Github Username : ") |
273
|
|
|
(list_1,list_2)=follow(username) |
274
|
|
|
dif(list_1,list_2) |
275
|
|
|
time_2=time.perf_counter() |
276
|
|
|
dif_time=str(time_2-time_1) |
277
|
|
|
print("Data Generated In "+time_convert(dif_time)+" sec") |
278
|
|
|
print("Log Files Are Ready --> " + os.getcwd()) |
279
|
|
|
gc.collect() |
280
|
|
|
|
281
|
|
|
|
282
|
|
|
|
283
|
|
|
|
284
|
|
|
|
285
|
|
|
|
286
|
|
|
|
287
|
|
|
|
288
|
|
|
|
289
|
|
|
|
290
|
|
|
|
291
|
|
|
|
292
|
|
|
|