Passed
Push — master ( 9faf20...545d25 )
by dai
01:46
created

school_api.client.api.utils.schedule_parse.ScheduleParse._merger_section_schedule()   C

Complexity

Conditions 10

Size

Total Lines 25
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
eloc 14
nop 1
dl 0
loc 25
rs 5.9999
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like school_api.client.api.utils.schedule_parse.ScheduleParse._merger_section_schedule() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
'''
3
    @Time       : 2016 - 2018
4
    @Author     : dairoot
5
    @Email      : [email protected]
6
    @description: 课表解析
7
'''
8
from __future__ import absolute_import, unicode_literals
9
10
import re
11
import six
12
from bs4 import BeautifulSoup
13
14
15
class BaseScheduleParse():
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
16
    ''' 课表页面解析模块 '''
17
    COlOR = ['green', 'blue', 'purple', 'red', 'yellow']
18
19
    def __init__(self, html, time_list, schedule_type):
20
        self.schedule_year = ''
21
        self.schedule_term = ''
22
        self.time_list = time_list
23
        self.schedule_type = schedule_type
24
        self.schedule_list = [[], [], [], [], [], [], []]
25
        self.schedule_dict = [[], [], [], [], [], [], []]
26
27
        soup = BeautifulSoup(html, "html.parser")
28
        option_args = soup.find_all("option", {"selected": "selected"})
29
        if option_args:
30
            self.schedule_year = option_args[0].text
31
            self.schedule_term = option_args[1].text
32
            table = soup.find("table", {"id": "Table6"}) if \
33
                schedule_type == 1 else soup.find("table", {"id": "Table1"})
34
            trs = table.find_all('tr')
35
            self.html_parse(trs)
36
37
    def html_parse(self, trs):
38
        """
39
        :param n+1: 为周几
40
        :param i-1: 为第几节
41
        :param arr: ["课程", "时间", "姓名", "地点", "节数", "周数数组"]
42
        :param row_arr: 为周几第几节 的课程信息
43
        :param rowspan: 表示该课程有几节课
44
        :return:
45
        """
46
        pattern = r'^\([\u2E80-\u9FFF]{1,3}\d+\)'
47
        # 每天最多有10节课, 数据从2到12, (i-1) 代表是第几节课 (偶数节 不获取)
48
        for i in range(2, 12, 2):
49
            tds = trs[i].find_all("td")
50
            # 去除无用数据,比如(上午, 第一节...  等等)
51
            if i in [2, 6, 10]:
52
                tds.pop(0)
53
            tds.pop(0)
54
            # 默认获取7天内的课表(周一到周日) tds 长度为7
55
            for day, day_c in enumerate(tds):
56
                row_arr = []
57
                if day_c.text != u' ':
58
                    td_str = day_c.__unicode__()
59
                    rowspan = 2 if 'rowspan="2"' in td_str else 1
60
                    td_main = re.sub(r'<td align="Center".*?>', '', td_str)[:-5]
61
62
                    for text in td_main.split('<br/><br/>'):
63
                        course_arr = self._get_td_course_info(text)
64
                        if course_arr[0] and not re.match(pattern, course_arr[0]):
65
                            course_arr[1] = self._get_weeks_text(course_arr[1])
66
                            weeks_arr = self._get_weeks_arr(course_arr[1])
67
                            row_arr.append(course_arr + [rowspan, weeks_arr])
68
                self.schedule_list[day].append(row_arr)
69
70
    def get_schedule_dict(self):
71
        ''' 返回课表数据 字典格式 '''
72
73
        for day, day_schedule in enumerate(self.schedule_list):
74
            for section, section_schedule in enumerate(day_schedule):
75
                section_schedule_dict = []
76
                color_index = (day * 3 + section + 1) % 5
77
                for schedule in section_schedule:
78
                    if schedule:
79
                        section_schedule_dict.append({
80
                            "color": self.COlOR[color_index],
81
                            "name": schedule[0],
82
                            "weeks_text": schedule[1],
83
                            "teacher": schedule[2],
84
                            "place": schedule[3],
85
                            "section": schedule[4],
86
                            "weeks_arr": schedule[5],
87
                            "time": self.time_list[schedule[4]][section]
88
                        })
89
                self.schedule_dict[day].append(section_schedule_dict)
90
91
        schedule_data = {
92
            'schedule_term': self.schedule_term,
93
            'schedule_year': self.schedule_year,
94
            'schedule': self.schedule_dict
95
        }
96
        return schedule_data
97
98
    def _get_weeks_text(self, class_time):
99
        ''' 课程周数文本 '''
100
        if not self.schedule_type:
101
            weeks_text = re.findall(r"{(.*)}", class_time)[0]
102
        else:
103
            # 2节/周
104
            # 2节/单周(7-7)
105
            # 1-10,13-18(1,2)
106
            if '2节/' in class_time:
107
                weeks_text = class_time if '(' in class_time else class_time + '(1-18)'
108
            else:
109
                weeks_text = class_time.split('(')[0]
110
        return weeks_text
111
112
    @staticmethod
113
    def _get_weeks_arr(weeks_text):
114
        """
115
        将上课时间 转成 数组形式
116
        :param class_time: 上课时间
117
        :param weeks_text: 课程周数文本
118
        :param weeks_arr: 上课周数数组
119
        :return:
120
        """
121
        weeks_arr = []
122
        step = 2 if '单' in weeks_text or '双' in weeks_text else 1
123
        for split_text in weeks_text.split(','):
124
            weeks = re.findall(r'(\d{1,2})-(\d{1,2})', split_text)
125
126
            if weeks:
127
                weeks_arr += range(int(weeks[0][0]), int(weeks[0][1]) + 1, step)
128
            else:
129
                weeks_arr += [int(split_text)]
130
131
        return weeks_arr
132
133
    @staticmethod
134
    def _get_td_course_info(text):
135
        ''' 获取td标签的课程信息 '''
136
        text = re.sub(r'<[/]{0,1}font[^>]*?>', '', text)
137
        text = re.sub(r'^<br/>', '', text)
138
139
        if six.PY2:
140
            # 以下兼容 python2 版本解析处理
141
            text = re.sub(r'</br></br></br>$', '', text)
142
            text = text.replace('<br>', '<br/>')
143
144
        info_arr = []
145
        for k in text.split('<br/>'):
146
            if k not in ['选修', '公选', '必修']:
147
                info_arr.append(k)
148
149
        info_arr = info_arr[:4:]
150
        if len(info_arr) == 3:
151
            # 没有上课地点的情况
152
            info_arr.append('')
153
        return info_arr
154
155
156
class ScheduleParse(BaseScheduleParse):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
157
    ''' 课表节数合并 '''
158
159
    def __init__(self, html, time_list, schedule_type=0):
160
        BaseScheduleParse.__init__(self, html, time_list, schedule_type)
161
        self.merger_same_schedule()
162
163
    def merger_same_schedule(self):
164
        """
165
        :param day_schedule: 一天的课程
166
        :param section_schedule: 一节课的课程
167
        :return:
168
        """
169
        for day_schedule in self.schedule_list:
170
            self._merger_day_schedule(day_schedule)
171
172
    def _merger_day_schedule(self, day_schedule):
173
        """
174
        将同一天相邻的相同两节课合并
175
        例如:[[["英语", "2节/双周(14-14)", "姓名", "1-301", "2", "[7,8]"],[...]],
176
        [["英语", "2节/双周(14-14)", "姓名", "1-301", "2", "[7,8]"],[...]]]
177
        合并为: 课程节数修改
178
        [[["英语", "2节/双周(14-14)", "姓名", "1-301", "4", "[7,8]"],[...]],
179
        [[...]]]
180
        """
181
        # 先合并 同一节课的相同课程
182
        for section_schedule in day_schedule:
183
            self._merger_section_schedule(section_schedule)
184
185
        # 再合并 同一天相邻的相同两节课合并
186
        day_slen = len(day_schedule)
187
        for i in range(day_slen - 1):
188
            for last_i, last_schedule in enumerate(day_schedule[i]):
189
                for next_i, next_schedule in enumerate(day_schedule[i + 1]):
190
                    if last_schedule and next_schedule:
191
                        # 课程名 上课地点 上课时间 教师名
192
                        if last_schedule[0] == next_schedule[0] and \
193
                            last_schedule[1] == next_schedule[1] and \
194
                                last_schedule[2] == next_schedule[2] and\
195
                                last_schedule[3] == next_schedule[3]:
196
197
                            day_schedule[i][last_i][4] += day_schedule[i + 1][next_i][4]
198
                            day_schedule[i + 1][next_i] = []
199
200
    @staticmethod
201
    def _merger_section_schedule(section_schedule):
202
        """
203
        将同一节课的相同课程合并
204
        例如:[["英语", "2节/单周(7-7)", "姓名", "1-301", "2", "[7]"],
205
         ["英语", "2节/双周(8-8)", "姓名", "1-301", "2", "[8]"]]
206
         合并为:课程时间修改
207
         [["英语", "2节/单周(7-7),2节/双周(8-8)", "姓名", "1-301", "2", "[7,8]"]]
208
        """
209
        section_slen = len(section_schedule)
210
        for i in range(section_slen):
211
            for j in range(i + 1, section_slen):
212
                if section_schedule[i] and section_schedule[j]:
213
                    # 课程名 一样时
214
                    if section_schedule[i][0] == section_schedule[j][0]:
215
                        # 并且上课时间不同,上课地点 一样时
216
                        if section_schedule[i][1] != section_schedule[j][1] and \
217
                                section_schedule[i][3] == section_schedule[j][3]:
218
                            section_schedule[j][5] += section_schedule[i][5]
219
                            section_schedule[j][1] += ',' + section_schedule[i][1]
220
                            section_schedule[i] = []
221
222
                        # 课程名和上课时间一样时 将上一个赋为空
223
                        if section_schedule[i] and section_schedule[i][1] == section_schedule[j][1]:
224
                            section_schedule[i] = []
225