1
|
|
|
import requests |
2
|
|
|
import sys |
3
|
|
|
import socket |
4
|
|
|
import os |
5
|
|
|
import datetime |
6
|
|
|
from functools import reduce |
7
|
|
|
import time |
8
|
|
|
from random import randint |
9
|
|
|
DEBUG=False |
10
|
|
|
import gc |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
def zero_insert(input_string): |
14
|
|
|
''' |
15
|
|
|
This function get a string as input if input is one digit add a zero |
16
|
|
|
:param input_string: input digit az string |
17
|
|
|
:type input_string:str |
18
|
|
|
:return: modified output as str |
19
|
|
|
''' |
20
|
|
|
if len(input_string)==1: |
21
|
|
|
return "0"+input_string |
22
|
|
|
return input_string |
23
|
|
|
|
24
|
|
|
def time_convert(input_string): |
25
|
|
|
''' |
26
|
|
|
This function convert input_string from uptime from sec to DD,HH,MM,SS Format |
27
|
|
|
:param input_string: input time string in sec |
28
|
|
|
:type input_string:str |
29
|
|
|
:return: converted time as string |
30
|
|
|
''' |
31
|
|
|
input_sec=float(input_string) |
32
|
|
|
input_minute=input_sec//60 |
33
|
|
|
input_sec=int(input_sec-input_minute*60) |
34
|
|
|
input_hour=input_minute//60 |
35
|
|
|
input_minute=int(input_minute-input_hour*60) |
36
|
|
|
input_day=int(input_hour//24) |
37
|
|
|
input_hour=int(input_hour-input_day*24) |
38
|
|
|
return zero_insert(str(input_day))+" days, "+zero_insert(str(input_hour))+" hour, "+zero_insert(str(input_minute))+" minutes, "+zero_insert(str(input_sec))+" seconds" |
39
|
|
|
|
40
|
|
|
def url_maker_following(Name,page_number): |
41
|
|
|
''' |
42
|
|
|
This function return github following page url |
43
|
|
|
:param Name: Username |
44
|
|
|
:param page_number: page nubmer of following page |
45
|
|
|
:type Name:str |
46
|
|
|
:type Page:int |
47
|
|
|
:return: github following url as string |
48
|
|
|
''' |
49
|
|
|
return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=following" |
50
|
|
|
|
51
|
|
|
def url_maker_repo(Name,page_number): |
52
|
|
|
''' |
53
|
|
|
This function return github repo page url |
54
|
|
|
:param Name: Username |
55
|
|
|
:param page_number: page nubmer of repos page |
56
|
|
|
:type Name:str |
57
|
|
|
:type Page:int |
58
|
|
|
:return: github repos url as string |
59
|
|
|
''' |
60
|
|
|
return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=repositories" |
61
|
|
|
def url_maker_follower(Name,page_number): |
62
|
|
|
''' |
63
|
|
|
This function return github follower page url |
64
|
|
|
:param Name: username |
65
|
|
|
:param page_number: page number of follower page |
66
|
|
|
:type Name:str |
67
|
|
|
:type page_number:int |
68
|
|
|
:return: github follower url as string |
69
|
|
|
''' |
70
|
|
|
return "https://github.com/" + Name + "?page=" + str(page_number) + "&tab=followers" |
71
|
|
|
def url_maker_star(Name,page_number): |
72
|
|
|
''' |
73
|
|
|
This function return github stars page url |
74
|
|
|
:param Name: username |
75
|
|
|
:param page_number: page number of stars |
76
|
|
|
:type Name :str |
77
|
|
|
:type page_number:int |
78
|
|
|
:return: github star url as string |
79
|
|
|
''' |
80
|
|
|
return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=stars" |
81
|
|
View Code Duplication |
def repo_extract(input_string,username): |
|
|
|
|
82
|
|
|
''' |
83
|
|
|
This function extract repo from raw_html |
84
|
|
|
:param input_string: raw input html |
85
|
|
|
:param user_name: user_name |
86
|
|
|
:type input_string:str |
87
|
|
|
:type user_name:str |
88
|
|
|
:return: repo_list as list |
89
|
|
|
''' |
90
|
|
|
try: |
91
|
|
|
user_list=[] |
92
|
|
|
index=0 |
93
|
|
|
shift=len(username)+1 |
94
|
|
|
while(index!=-1): |
95
|
|
|
index=input_string.find('src="/'+username,index+shift,len(input_string)) |
96
|
|
|
length=input_string[index:].find('graphs/') |
97
|
|
|
star_repo=input_string[index+5:index+length] |
98
|
|
|
if star_repo.find("<svg")==-1 and len(star_repo)!=0: |
99
|
|
|
user_list.append(star_repo) |
100
|
|
|
return user_list |
101
|
|
|
except Exception as ex: |
102
|
|
|
pass |
103
|
|
|
|
104
|
|
View Code Duplication |
def star_extract(input_string): |
|
|
|
|
105
|
|
|
''' |
106
|
|
|
This function extract stared repo from raw_html |
107
|
|
|
:param input_string: raw input html |
108
|
|
|
:param follower_name: follower_name |
109
|
|
|
:type input_string:str |
110
|
|
|
:type follower_name:str |
111
|
|
|
:return: user_list as list |
112
|
|
|
''' |
113
|
|
|
user_list=[] |
114
|
|
|
index=0 |
115
|
|
|
try: |
116
|
|
|
while(index!=-1): |
117
|
|
|
index=input_string.find('<a class="muted-link mr-3',index+33,len(input_string)) |
118
|
|
|
length=input_string[index+33:].find('stargazers">\n') |
119
|
|
|
star_repo=input_string[index+34:index+33+length] |
120
|
|
|
if star_repo.find("<svg")==-1 and len(star_repo)!=0: |
121
|
|
|
user_list.append(star_repo) |
122
|
|
|
return user_list |
123
|
|
|
except Exception as ex: |
124
|
|
|
pass |
125
|
|
|
|
126
|
|
View Code Duplication |
def user_list_gen(input_string,follower_name): |
|
|
|
|
127
|
|
|
''' |
128
|
|
|
This function extract usernames from raw_html |
129
|
|
|
:param input_string: raw input html |
130
|
|
|
:param follower_name: follower_name |
131
|
|
|
:type input_string:str |
132
|
|
|
:type follower_name:str |
133
|
|
|
:return: user_list as list |
134
|
|
|
''' |
135
|
|
|
try: |
136
|
|
|
user_list = [] |
137
|
|
|
index = 0 |
138
|
|
|
while(index!=-1): |
139
|
|
|
index=input_string.find('alt="@',index+6,len(input_string)) |
140
|
|
|
length=input_string[index+6:].find('"') |
141
|
|
|
user_name=input_string[index+6:index+6+length] |
142
|
|
|
if user_name!=follower_name: |
143
|
|
|
if user_name!=follower_name: |
144
|
|
|
user_list.append(user_name) |
145
|
|
|
return user_list[:-1] |
146
|
|
|
except Exception as ex: |
147
|
|
|
pass |
148
|
|
|
def get_html(url): |
149
|
|
|
''' |
150
|
|
|
This function extract raw_html file |
151
|
|
|
:param url: url |
152
|
|
|
:type url:str |
153
|
|
|
:return: html data |
154
|
|
|
''' |
155
|
|
|
time.sleep(create_random_sleep()) |
156
|
|
|
if internet()==True: |
157
|
|
|
new_session=requests.session() |
158
|
|
|
new_session.cookies.clear() |
159
|
|
|
raw_html=new_session.get(url) |
160
|
|
|
new_session.close() |
161
|
|
|
raw_data=raw_html.text |
162
|
|
|
if "Not Found" in raw_data: |
163
|
|
|
print("Invalid Github User") |
164
|
|
|
sys.exit() |
165
|
|
|
return raw_data |
166
|
|
|
else: |
167
|
|
|
print("Error In Internet") |
168
|
|
|
pass |
169
|
|
|
|
170
|
|
|
|
171
|
|
|
def end_check(input_string): |
172
|
|
|
''' |
173
|
|
|
This function check end page |
174
|
|
|
:param input_string: raw html |
175
|
|
|
:type input_string:str |
176
|
|
|
:return: True or False |
177
|
|
|
''' |
178
|
|
|
if input_string.find("reached the end")!=-1: |
179
|
|
|
return True |
180
|
|
|
else: |
181
|
|
|
return False |
182
|
|
View Code Duplication |
def follower_list_gen(follower_name,page_number=0,counter=0): |
|
|
|
|
183
|
|
|
''' |
184
|
|
|
This function generate follower_list |
185
|
|
|
:param follower_name: username |
186
|
|
|
:type follower_name:str |
187
|
|
|
:return: username follower list |
188
|
|
|
''' |
189
|
|
|
try: |
190
|
|
|
follower_list = [] |
191
|
|
|
while (True): |
192
|
|
|
page_number += 1 |
193
|
|
|
follower_url = url_maker_follower(follower_name, page_number) |
194
|
|
|
follower_html = get_html(follower_url) |
195
|
|
|
if end_check(follower_html) == True: |
196
|
|
|
break |
197
|
|
|
temp_list = user_list_gen(follower_html,follower_name) |
198
|
|
|
follower_list.extend(temp_list) |
199
|
|
|
return follower_list |
200
|
|
|
except Exception as ex: |
201
|
|
|
if counter>3: |
202
|
|
|
sys.exit() |
203
|
|
|
error_log("Error In Page "+str(page_number)+" Follower Page") |
204
|
|
|
follower_list_gen(follower_name,page_number,counter+1) |
205
|
|
|
def repo_list(username,page_number=0,counter=0): |
206
|
|
|
''' |
207
|
|
|
This function return stared_repo list |
208
|
|
|
:param username: username |
209
|
|
|
:type username:str |
210
|
|
|
:return: stared repo as list |
211
|
|
|
''' |
212
|
|
|
try: |
213
|
|
|
repo_list_temp=[] |
214
|
|
|
while (True): |
215
|
|
|
page_number += 1 |
216
|
|
|
repo_url = url_maker_repo(username, page_number) |
217
|
|
|
repo_html = get_html(repo_url) |
218
|
|
|
temp_list = repo_extract(repo_html,username) |
219
|
|
|
if len(temp_list)==0: |
220
|
|
|
break |
221
|
|
|
repo_list_temp.extend(temp_list) |
222
|
|
|
return repo_list_temp |
223
|
|
|
except Exception as ex: |
224
|
|
|
if counter>3: |
225
|
|
|
sys.exit() |
226
|
|
|
error_log("Error In Page " + str(page_number) + " Repos Page") |
227
|
|
|
repo_list(username,page_number,counter+1) |
228
|
|
|
def star_list(username,page_number=0,counter=0): |
229
|
|
|
''' |
230
|
|
|
This function return stared_repo list |
231
|
|
|
:param username: username |
232
|
|
|
:type username:str |
233
|
|
|
:return: stared repo as list |
234
|
|
|
''' |
235
|
|
|
try: |
236
|
|
|
star_list_temp=[] |
237
|
|
|
while (True): |
238
|
|
|
page_number += 1 |
239
|
|
|
star_url = url_maker_star(username, page_number) |
240
|
|
|
star_html = get_html(star_url) |
241
|
|
|
temp_list = star_extract(star_html) |
242
|
|
|
if len(temp_list)==0: |
243
|
|
|
break |
244
|
|
|
star_list_temp.extend(temp_list) |
245
|
|
|
return star_list_temp |
246
|
|
|
except Exception as ex: |
247
|
|
|
if counter>3: |
248
|
|
|
sys.exit() |
249
|
|
|
error_log("Error In Page " + str(page_number) + " Stars Page") |
250
|
|
|
star_list(username,page_number,counter+1) |
251
|
|
|
|
252
|
|
View Code Duplication |
def following_list_gen(follower_name,page_number=0,counter=0): |
|
|
|
|
253
|
|
|
''' |
254
|
|
|
This function generate following list |
255
|
|
|
:param follower_name: username |
256
|
|
|
:type follower_name:str |
257
|
|
|
:return: username following list |
258
|
|
|
''' |
259
|
|
|
try: |
260
|
|
|
following_list = [] |
261
|
|
|
while (True): |
262
|
|
|
page_number+=1 |
263
|
|
|
following_url = url_maker_following(follower_name, page_number) |
264
|
|
|
following_html = get_html(following_url) |
265
|
|
|
if end_check(following_html) == True: |
266
|
|
|
break |
267
|
|
|
temp_list = user_list_gen(following_html,follower_name) |
268
|
|
|
following_list.extend(temp_list) |
269
|
|
|
return following_list |
270
|
|
|
except Exception as ex: |
271
|
|
|
if counter>3: |
272
|
|
|
sys.exit() |
273
|
|
|
error_log("Error In Page " + str(page_number) + " Following Page") |
274
|
|
|
following_list_gen(follower_name,page_number,counter+1) |
275
|
|
|
|
276
|
|
|
def error_log(msg): |
277
|
|
|
""" |
278
|
|
|
Create the errorlog of the app |
279
|
|
|
:param msg: error message |
280
|
|
|
:type msg:str |
281
|
|
|
""" |
282
|
|
|
if "log" not in os.listdir(): |
283
|
|
|
os.mkdir("log") |
284
|
|
|
file = open(reduce(os.path.join, [os.getcwd(), "log", "error_log.txt"]), "a") |
285
|
|
|
file.write(str(datetime.datetime.now()) + " --> " + str(msg) + "\n") |
286
|
|
|
file.close() |
287
|
|
|
|
288
|
|
|
def internet(host="8.8.8.8", port=53, timeout=3): |
289
|
|
|
""" |
290
|
|
|
Check Internet Connections. |
291
|
|
|
:param host: the host that check connection to |
292
|
|
|
:param port: port that check connection with |
293
|
|
|
:param timeout: times that check the connnection |
294
|
|
|
:type host:str |
295
|
|
|
:type port:int |
296
|
|
|
:type timeout:int |
297
|
|
|
:return bool: True if Connection is Stable |
298
|
|
|
>>> internet() # if there is stable internet connection |
299
|
|
|
True |
300
|
|
|
>>> internet() # if there is no stable internet connection |
301
|
|
|
False |
302
|
|
|
""" |
303
|
|
|
try: |
304
|
|
|
socket.setdefaulttimeout(timeout) |
305
|
|
|
socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) |
306
|
|
|
return True |
307
|
|
|
except Exception as ex: |
308
|
|
|
return False |
309
|
|
|
|
310
|
|
|
def create_random_sleep(index=1,min_time=1,max_time=3): |
311
|
|
|
''' |
312
|
|
|
This function generate sleep time with random processes |
313
|
|
|
:param index: index to determine first page and messages(index = 0 is for first page) |
314
|
|
|
:param min_time: minimum time of sleep |
315
|
|
|
:param max_time: maximum time of sleep |
316
|
|
|
:type index:int |
317
|
|
|
:type min_time:int |
318
|
|
|
:type max_time:int |
319
|
|
|
:return: time of sleep as integer (a number between max and min) |
320
|
|
|
''' |
321
|
|
|
if index==0: |
322
|
|
|
time_sleep = 5 |
323
|
|
|
if DEBUG==True: |
324
|
|
|
print("Wait "+str(time_sleep)+" sec for first search . . .") |
325
|
|
|
else: |
326
|
|
|
time_sleep = randint(min_time, max_time) |
327
|
|
|
if DEBUG==True: |
328
|
|
|
print("Wait "+str(time_sleep)+" sec for next search . . .") |
329
|
|
|
if DEBUG==True: |
330
|
|
|
print_line(70,"*") |
331
|
|
|
return time_sleep |
332
|
|
|
|
333
|
|
|
def print_line(number=30,char="-"): |
334
|
|
|
''' |
335
|
|
|
This function print line in screen |
336
|
|
|
:param number: number of items in each line |
337
|
|
|
:param char: each char of line |
338
|
|
|
:return: None |
339
|
|
|
''' |
340
|
|
|
line="" |
341
|
|
|
for i in range(number): |
342
|
|
|
line=line+char |
343
|
|
|
print(line) |
344
|
|
|
|
345
|
|
|
|
346
|
|
|
def follow(username): |
347
|
|
|
''' |
348
|
|
|
This function create following and follower list |
349
|
|
|
:param username: username |
350
|
|
|
:type username:str |
351
|
|
|
:return: (list_1,list_2) as tuple |
352
|
|
|
''' |
353
|
|
|
try: |
354
|
|
|
print("Collecting Follower Information ...") |
355
|
|
|
print_line(70, "*") |
356
|
|
|
list_1 = follower_list_gen(username) |
357
|
|
|
file = open(username + "_follower.log", "w") |
358
|
|
|
print(str(len(list_1)) + " Followers --> " + username + "_follower.log") |
359
|
|
|
print_line(70, "*") |
360
|
|
|
file.write("\n".join(list_1)) |
361
|
|
|
file.close() |
362
|
|
|
print('Collecting Following Informnation ...') |
363
|
|
|
print_line(70, "*") |
364
|
|
|
list_2 = following_list_gen(username) |
365
|
|
|
file = open(username + "_following.log", "w") |
366
|
|
|
print(str(len(list_2)) + " Following --> " + username + "_following.log") |
367
|
|
|
print_line(70, "*") |
368
|
|
|
file.write("\n".join(list_2)) |
369
|
|
|
file.close() |
370
|
|
|
print('Collecting Stars Informnation ...') |
371
|
|
|
print_line(70, "*") |
372
|
|
|
stars=star_list(username) |
373
|
|
|
file = open(username + "_stars.log", "w") |
374
|
|
|
print(str(len(stars)) + " Stars --> " + username + "_stars.log") |
375
|
|
|
print_line(70, "*") |
376
|
|
|
file.write("\n".join(stars)) |
377
|
|
|
file.close() |
378
|
|
|
|
379
|
|
|
print('Collecting Repos Informnation ...') |
380
|
|
|
print_line(70, "*") |
381
|
|
|
repos = repo_list(username) |
382
|
|
|
file = open(username + "_repos.log", "w") |
383
|
|
|
print(str(len(repos)) + " Repos --> " + username + "_repos.log") |
384
|
|
|
print_line(70, "*") |
385
|
|
|
file.write("\n".join(repos)) |
386
|
|
|
file.close() |
387
|
|
|
return (list_1,list_2) |
388
|
|
|
except Exception as ex: |
389
|
|
|
error_log(str(ex)) |
390
|
|
|
|
391
|
|
|
def dif(list_1,list_2): |
392
|
|
|
''' |
393
|
|
|
This function generate dif files |
394
|
|
|
:param list_1:follower list |
395
|
|
|
:param list_2: following list |
396
|
|
|
:type list_1:list |
397
|
|
|
:type list_2:list |
398
|
|
|
:return: None |
399
|
|
|
''' |
400
|
|
|
try: |
401
|
|
|
file = open(username + "_NotFollower.log", "w") |
402
|
|
|
dif_list = list(set(list_2) - set(list_1)) |
403
|
|
|
print(str(len(dif_list)) + " Following - Not Follower --> " + username + "_NotFollower.log") |
404
|
|
|
print_line(70, "*") |
405
|
|
|
file.write("\n".join(dif_list)) |
406
|
|
|
file.close() |
407
|
|
|
file = open(username + "_NotFollowing.log", "w") |
408
|
|
|
dif_list = list(set(list_1) - set(list_2)) |
409
|
|
|
print(str(len(dif_list)) + " Follower - Not Following --> " + username + "_NotFollowing.log") |
410
|
|
|
print_line(70, "*") |
411
|
|
|
file.write("\n".join(dif_list)) |
412
|
|
|
file.close() |
413
|
|
|
except Exception as ex: |
414
|
|
|
pass |
415
|
|
|
if __name__=="__main__": |
416
|
|
|
try: |
417
|
|
|
time_1=time.perf_counter() |
418
|
|
|
username=input("Please Enter Your Github Username : ") |
419
|
|
|
(list_1,list_2)=follow(username) |
420
|
|
|
dif(list_1,list_2) |
421
|
|
|
time_2=time.perf_counter() |
422
|
|
|
dif_time=str(time_2-time_1) |
423
|
|
|
print("Data Generated In "+time_convert(dif_time)+" sec") |
424
|
|
|
print("Log Files Are Ready --> " + os.getcwd()) |
425
|
|
|
gc.collect() |
426
|
|
|
except Exception as ex: |
427
|
|
|
error_log(str(ex)) |
428
|
|
|
|
429
|
|
|
|
430
|
|
|
|
431
|
|
|
|
432
|
|
|
|
433
|
|
|
|
434
|
|
|
|
435
|
|
|
|
436
|
|
|
|
437
|
|
|
|
438
|
|
|
|
439
|
|
|
|
440
|
|
|
|