Completed
Push — master ( a0e972...a7ff6d )
by Sepand
01:07
created

repo_extract()   B

Complexity

Conditions 5

Size

Total Lines 22

Duplication

Lines 22
Ratio 100 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 5
c 1
b 0
f 1
dl 22
loc 22
rs 8.3411
1
import requests
2
import sys
3
import socket
4
import os
5
import datetime
6
from functools import reduce
7
import time
8
from random import randint
9
DEBUG=False
10
import gc
11
12
13
def zero_insert(input_string):
14
    '''
15
    This function get a string as input if input is one digit add a zero
16
    :param input_string: input digit az string
17
    :type input_string:str
18
    :return: modified output as str
19
    '''
20
    if len(input_string)==1:
21
        return "0"+input_string
22
    return input_string
23
24
def time_convert(input_string):
25
    '''
26
    This function convert input_string from uptime from sec to DD,HH,MM,SS Format
27
    :param input_string: input time string  in sec
28
    :type input_string:str
29
    :return: converted time as string
30
    '''
31
    input_sec=float(input_string)
32
    input_minute=input_sec//60
33
    input_sec=int(input_sec-input_minute*60)
34
    input_hour=input_minute//60
35
    input_minute=int(input_minute-input_hour*60)
36
    input_day=int(input_hour//24)
37
    input_hour=int(input_hour-input_day*24)
38
    return zero_insert(str(input_day))+" days, "+zero_insert(str(input_hour))+" hour, "+zero_insert(str(input_minute))+" minutes, "+zero_insert(str(input_sec))+" seconds"
39
40
def url_maker_following(Name,page_number):
41
    '''
42
    This function return github following page url
43
    :param Name: Username
44
    :param page_number: page nubmer of following page
45
    :type Name:str
46
    :type Page:int
47
    :return: github following url as string
48
    '''
49
    return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=following"
50
51
def url_maker_repo(Name,page_number):
52
    '''
53
    This function return github repo page url
54
    :param Name: Username
55
    :param page_number: page nubmer of repos page
56
    :type Name:str
57
    :type Page:int
58
    :return: github repos url as string
59
    '''
60
    return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=repositories"
61
def url_maker_follower(Name,page_number):
62
    '''
63
    This function return github follower page url
64
    :param Name: username
65
    :param page_number: page number of follower page
66
    :type Name:str
67
    :type page_number:int
68
    :return: github follower url as string
69
    '''
70
    return "https://github.com/" + Name + "?page=" + str(page_number) + "&tab=followers"
71
def url_maker_star(Name,page_number):
72
    '''
73
    This function return github stars page url
74
    :param Name: username
75
    :param page_number: page number of stars
76
    :type Name :str
77
    :type page_number:int
78
    :return: github star url as string
79
    '''
80
    return "https://github.com/"+Name+"?page="+str(page_number)+"&tab=stars"
81 View Code Duplication
def repo_extract(input_string,username):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
82
    '''
83
    This function extract repo from raw_html
84
    :param input_string: raw input html
85
    :param user_name: user_name
86
    :type input_string:str
87
    :type user_name:str
88
    :return: repo_list as list
89
    '''
90
    try:
91
        user_list=[]
92
        index=0
93
        shift=len(username)+1
94
        while(index!=-1):
95
            index=input_string.find('src="/'+username,index+shift,len(input_string))
96
            length=input_string[index:].find('graphs/')
97
            star_repo=input_string[index+5:index+length]
98
            if star_repo.find("<svg")==-1 and len(star_repo)!=0:
99
                user_list.append(star_repo)
100
        return user_list
101
    except Exception as ex:
102
        pass
103
104 View Code Duplication
def star_extract(input_string):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
105
    '''
106
    This function extract stared repo from raw_html
107
    :param input_string: raw input html
108
    :param follower_name: follower_name
109
    :type input_string:str
110
    :type follower_name:str
111
    :return: user_list as list
112
    '''
113
    user_list=[]
114
    index=0
115
    try:
116
        while(index!=-1):
117
            index=input_string.find('<a class="muted-link mr-3',index+33,len(input_string))
118
            length=input_string[index+33:].find('stargazers">\n')
119
            star_repo=input_string[index+34:index+33+length]
120
            if star_repo.find("<svg")==-1 and len(star_repo)!=0:
121
                user_list.append(star_repo)
122
        return user_list
123
    except Exception as ex:
124
        pass
125
126 View Code Duplication
def user_list_gen(input_string,follower_name):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
127
    '''
128
    This function extract usernames from raw_html
129
    :param input_string: raw input html
130
    :param follower_name: follower_name
131
    :type input_string:str
132
    :type follower_name:str
133
    :return: user_list as list
134
    '''
135
    try:
136
        user_list = []
137
        index = 0
138
        while(index!=-1):
139
            index=input_string.find('alt="@',index+6,len(input_string))
140
            length=input_string[index+6:].find('"')
141
            user_name=input_string[index+6:index+6+length]
142
            if user_name!=follower_name:
143
                if user_name!=follower_name:
144
                    user_list.append(user_name)
145
        return user_list[:-1]
146
    except Exception as ex:
147
        pass
148
def get_html(url):
149
    '''
150
    This function extract raw_html file
151
    :param url: url
152
    :type url:str
153
    :return: html data
154
    '''
155
    time.sleep(create_random_sleep())
156
    if internet()==True:
157
        new_session=requests.session()
158
        new_session.cookies.clear()
159
        raw_html=new_session.get(url)
160
        new_session.close()
161
        raw_data=raw_html.text
162
        if "Not Found" in raw_data:
163
            print("Invalid Github User")
164
            sys.exit()
165
        return raw_data
166
    else:
167
        print("Error In Internet")
168
        pass
169
170
171
def end_check(input_string):
172
    '''
173
    This function check end page
174
    :param input_string: raw html
175
    :type input_string:str
176
    :return: True or False
177
    '''
178
    if input_string.find("reached the end")!=-1:
179
        return True
180
    else:
181
        return False
182
def follower_list_gen(follower_name):
183
    '''
184
    This function generate follower_list
185
    :param follower_name: username
186
    :type follower_name:str
187
    :return: username follower list
188
    '''
189
    try:
190
        follower_list = []
191
        page_number=0
192
        while (True):
193
            page_number += 1
194
            follower_url = url_maker_follower(follower_name, page_number)
195
            follower_html = get_html(follower_url)
196
            if end_check(follower_html) == True:
197
                break
198
            temp_list = user_list_gen(follower_html,follower_name)
199
            follower_list.extend(temp_list)
200
        return follower_list
201
    except Exception as ex:
202
        error_log("Error In Page "+str(page_number)+" Follower Page")
203
def repo_list(username):
204
    '''
205
    This function return stared_repo list
206
    :param username: username
207
    :type username:str
208
    :return: stared repo as list
209
    '''
210
    try:
211
        repo_list_temp=[]
212
        page_number=0
213
        while (True):
214
            page_number += 1
215
            repo_url = url_maker_repo(username, page_number)
216
            repo_html = get_html(repo_url)
217
            temp_list = repo_extract(repo_html,username)
218
            if len(temp_list)==0:
219
                break
220
            repo_list_temp.extend(temp_list)
221
        return repo_list_temp
222
    except Exception as ex:
223
        error_log("Error In Page " + str(page_number) + " Repos Page")
224
def star_list(username):
225
    '''
226
    This function return stared_repo list
227
    :param username: username
228
    :type username:str
229
    :return: stared repo as list
230
    '''
231
    try:
232
        star_list=[]
233
        page_number=0
234
        while (True):
235
            page_number += 1
236
            star_url = url_maker_star(username, page_number)
237
            star_html = get_html(star_url)
238
            temp_list = star_extract(star_html)
239
            if len(temp_list)==0:
240
                break
241
            star_list.extend(temp_list)
242
        return star_list
243
    except Exception as ex:
244
        error_log("Error In Page " + str(page_number) + " Stars Page")
245
246
def following_list_gen(follower_name):
247
    '''
248
    This function generate following list
249
    :param follower_name: username
250
    :type follower_name:str
251
    :return: username following list
252
    '''
253
    try:
254
        following_list = []
255
        page_number=0
256
        while (True):
257
            page_number+=1
258
            following_url = url_maker_following(follower_name, page_number)
259
            following_html = get_html(following_url)
260
            if end_check(following_html) == True:
261
                break
262
            temp_list = user_list_gen(following_html,follower_name)
263
            following_list.extend(temp_list)
264
        return following_list
265
    except Exception as ex:
266
        error_log("Error In Page " + str(page_number) + " Following Page")
267
268
def error_log(msg):
269
    """
270
    Create the errorlog of the app
271
    :param msg: error message
272
    :type msg:str
273
    """
274
    if "log" not in os.listdir():
275
        os.mkdir("log")
276
    file = open(reduce(os.path.join, [os.getcwd(), "log", "error_log.txt"]), "a")
277
    file.write(str(datetime.datetime.now()) + " --> " + str(msg) + "\n")
278
    file.close()
279
280
def internet(host="8.8.8.8", port=53, timeout=3):
281
    """
282
    Check Internet Connections.
283
    :param  host: the host that check connection to
284
    :param  port: port that check connection with
285
    :param  timeout: times that check the connnection
286
    :type host:str
287
    :type port:int
288
    :type timeout:int
289
    :return bool: True if Connection is Stable
290
    >>> internet() # if there is stable internet connection
291
    True
292
    >>> internet() # if there is no stable internet connection
293
    False
294
    """
295
    try:
296
        socket.setdefaulttimeout(timeout)
297
        socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port))
298
        return True
299
    except Exception as ex:
300
        return False
301
302
def create_random_sleep(index=1,min_time=1,max_time=3):
303
    '''
304
    This function generate sleep time with random processes
305
    :param index: index to determine first page  and messages(index = 0 is for first page)
306
    :param min_time: minimum time of sleep
307
    :param max_time: maximum time of sleep
308
    :type index:int
309
    :type min_time:int
310
    :type max_time:int
311
    :return: time of sleep as integer (a number between max and min)
312
    '''
313
    if index==0:
314
        time_sleep = 5
315
        if DEBUG==True:
316
            print("Wait "+str(time_sleep)+" sec for first search . . .")
317
    else:
318
        time_sleep = randint(min_time, max_time)
319
        if DEBUG==True:
320
            print("Wait "+str(time_sleep)+" sec for next search . . .")
321
    if DEBUG==True:
322
        print_line(70,"*")
323
    return time_sleep
324
325
def print_line(number=30,char="-"):
326
    '''
327
    This function print line in screen
328
    :param number: number of items in each line
329
    :param char: each char of line
330
    :return: None
331
    '''
332
    line=""
333
    for i in range(number):
334
        line=line+char
335
    print(line)
336
337
338
def follow(username):
339
    '''
340
    This function create following and follower list
341
    :param username: username
342
    :type username:str
343
    :return: (list_1,list_2) as tuple
344
    '''
345
    try:
346
        print("Collecting Follower Information ...")
347
        print_line(70, "*")
348
        list_1 = follower_list_gen(username)
349
        file = open(username + "_follower.log", "w")
350
        print(str(len(list_1)) + " Followers --> " + username + "_follower.log")
351
        print_line(70, "*")
352
        file.write("\n".join(list_1))
353
        file.close()
354
        print('Collecting Following Informnation ...')
355
        print_line(70, "*")
356
        list_2 = following_list_gen(username)
357
        file = open(username + "_following.log", "w")
358
        print(str(len(list_2)) + " Following --> " + username + "_following.log")
359
        print_line(70, "*")
360
        file.write("\n".join(list_2))
361
        file.close()
362
        print('Collecting Stars Informnation ...')
363
        print_line(70, "*")
364
        stars=star_list(username)
365
        file = open(username + "_stars.log", "w")
366
        print(str(len(stars)) + " Stars --> " + username + "_stars.log")
367
        print_line(70, "*")
368
        file.write("\n".join(stars))
369
        file.close()
370
371
        print('Collecting Repos Informnation ...')
372
        print_line(70, "*")
373
        repos = repo_list(username)
374
        file = open(username + "_repos.log", "w")
375
        print(str(len(repos)) + " Repos --> " + username + "_repos.log")
376
        print_line(70, "*")
377
        file.write("\n".join(repos))
378
        file.close()
379
        return (list_1,list_2)
380
    except Exception as ex:
381
        error_log(str(ex))
382
383
def dif(list_1,list_2):
384
    '''
385
    This function generate dif files
386
    :param list_1:follower list
387
    :param list_2: following list
388
    :type list_1:list
389
    :type list_2:list
390
    :return: None
391
    '''
392
    try:
393
        file = open(username + "_NotFollower.log", "w")
394
        dif_list = list(set(list_2) - set(list_1))
395
        print(str(len(dif_list)) + " Following - Not Follower --> " + username + "_NotFollower.log")
396
        print_line(70, "*")
397
        file.write("\n".join(dif_list))
398
        file.close()
399
        file = open(username + "_NotFollowing.log", "w")
400
        dif_list = list(set(list_1) - set(list_2))
401
        print(str(len(dif_list)) + " Follower - Not Following --> " + username + "_NotFollowing.log")
402
        print_line(70, "*")
403
        file.write("\n".join(dif_list))
404
        file.close()
405
    except Exception as ex:
406
        pass
407
if __name__=="__main__":
408
    try:
409
        time_1=time.perf_counter()
410
        username=input("Please Enter Your Github Username : ")
411
        (list_1,list_2)=follow(username)
412
        dif(list_1,list_2)
413
        time_2=time.perf_counter()
414
        dif_time=str(time_2-time_1)
415
        print("Data Generated In "+time_convert(dif_time)+" sec")
416
        print("Log Files Are Ready --> " + os.getcwd())
417
        gc.collect()
418
    except Exception as ex:
419
        error_log(str(ex))
420
421
422
423
424
425
426
427
428
429
430
431
432