school_api.client.api.utils.schedule_parse.ScheduleParse._merger_section_schedule() - Code Metrics - Inspection of "修复课表课程合并问题" - dairoot/school-api - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 9faf20...545d25 )

by dai

created 2018-11-05 13:41 UTC

school_api.client.api.utils.schedule_parse.ScheduleParse._merger_section_schedule() C

↳ Parent: Project

Complexity

Conditions

Size

Total Lines	25
Code Lines	14

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	10
eloc	14
nop	1
dl	0
loc	25
rs	5.9999
c	0
b	0
f	0

How to fix Complexity

# -*- coding: utf-8 -*-
'''
    @Time       : 2016 - 2018
    @Author     : dairoot
    @Email      : [email protected]
    @description: 课表解析
'''
from __future__ import absolute_import, unicode_literals

import re
import six
from bs4 import BeautifulSoup


class BaseScheduleParse():

    ''' 课表页面解析模块 '''
    COlOR = ['green', 'blue', 'purple', 'red', 'yellow']

    def __init__(self, html, time_list, schedule_type):
        self.schedule_year = ''
        self.schedule_term = ''
        self.time_list = time_list
        self.schedule_type = schedule_type
        self.schedule_list = [[], [], [], [], [], [], []]
        self.schedule_dict = [[], [], [], [], [], [], []]

        soup = BeautifulSoup(html, "html.parser")
        option_args = soup.find_all("option", {"selected": "selected"})
        if option_args:
            self.schedule_year = option_args[0].text
            self.schedule_term = option_args[1].text
            table = soup.find("table", {"id": "Table6"}) if \
                schedule_type == 1 else soup.find("table", {"id": "Table1"})
            trs = table.find_all('tr')
            self.html_parse(trs)

    def html_parse(self, trs):
        """
        :param n+1: 为周几
        :param i-1: 为第几节
        :param arr: ["课程", "时间", "姓名", "地点", "节数", "周数数组"]
        :param row_arr: 为周几第几节 的课程信息
        :param rowspan: 表示该课程有几节课
        :return:
        """
        pattern = r'^\([\u2E80-\u9FFF]{1,3}\d+\)'
        # 每天最多有10节课, 数据从2到12, (i-1) 代表是第几节课 (偶数节 不获取)
        for i in range(2, 12, 2):
            tds = trs[i].find_all("td")
            # 去除无用数据，比如(上午, 第一节...  等等)
            if i in [2, 6, 10]:
                tds.pop(0)
            tds.pop(0)
            # 默认获取7天内的课表(周一到周日) tds 长度为7
            for day, day_c in enumerate(tds):
                row_arr = []
                if day_c.text != u' ':
                    td_str = day_c.__unicode__()
                    rowspan = 2 if 'rowspan="2"' in td_str else 1
                    td_main = re.sub(r'<td align="Center".*?>', '', td_str)[:-5]

                    for text in td_main.split('<br/><br/>'):
                        course_arr = self._get_td_course_info(text)
                        if course_arr[0] and not re.match(pattern, course_arr[0]):
                            course_arr[1] = self._get_weeks_text(course_arr[1])
                            weeks_arr = self._get_weeks_arr(course_arr[1])
                            row_arr.append(course_arr + [rowspan, weeks_arr])
                self.schedule_list[day].append(row_arr)

    def get_schedule_dict(self):
        ''' 返回课表数据 字典格式 '''

        for day, day_schedule in enumerate(self.schedule_list):
            for section, section_schedule in enumerate(day_schedule):
                section_schedule_dict = []
                color_index = (day * 3 + section + 1) % 5
                for schedule in section_schedule:
                    if schedule:
                        section_schedule_dict.append({
                            "color": self.COlOR[color_index],
                            "name": schedule[0],
                            "weeks_text": schedule[1],
                            "teacher": schedule[2],
                            "place": schedule[3],
                            "section": schedule[4],
                            "weeks_arr": schedule[5],
                            "time": self.time_list[schedule[4]][section]
                        })
                self.schedule_dict[day].append(section_schedule_dict)

        schedule_data = {
            'schedule_term': self.schedule_term,
            'schedule_year': self.schedule_year,
            'schedule': self.schedule_dict
        }
        return schedule_data

    def _get_weeks_text(self, class_time):
        ''' 课程周数文本 '''
        if not self.schedule_type:
            weeks_text = re.findall(r"{(.*)}", class_time)[0]
        else:
            # 2节/周
            # 2节/单周(7-7)
            # 1-10,13-18(1,2)
            if '2节/' in class_time:
                weeks_text = class_time if '(' in class_time else class_time + '(1-18)'
            else:
                weeks_text = class_time.split('(')[0]
        return weeks_text

    @staticmethod
    def _get_weeks_arr(weeks_text):
        """
        将上课时间 转成 数组形式
        :param class_time: 上课时间
        :param weeks_text: 课程周数文本
        :param weeks_arr: 上课周数数组
        :return:
        """
        weeks_arr = []
        step = 2 if '单' in weeks_text or '双' in weeks_text else 1
        for split_text in weeks_text.split(','):
            weeks = re.findall(r'(\d{1,2})-(\d{1,2})', split_text)

            if weeks:
                weeks_arr += range(int(weeks[0][0]), int(weeks[0][1]) + 1, step)
            else:
                weeks_arr += [int(split_text)]

        return weeks_arr

    @staticmethod
    def _get_td_course_info(text):
        ''' 获取td标签的课程信息 '''
        text = re.sub(r'<[/]{0,1}font[^>]*?>', '', text)
        text = re.sub(r'^<br/>', '', text)

        if six.PY2:
            # 以下兼容 python2 版本解析处理
            text = re.sub(r'</br></br></br>$', '', text)
            text = text.replace('<br>', '<br/>')

        info_arr = []
        for k in text.split('<br/>'):
            if k not in ['选修', '公选', '必修']:
                info_arr.append(k)

        info_arr = info_arr[:4:]
        if len(info_arr) == 3:
            # 没有上课地点的情况
            info_arr.append('')
        return info_arr


class ScheduleParse(BaseScheduleParse):

    ''' 课表节数合并 '''

    def __init__(self, html, time_list, schedule_type=0):
        BaseScheduleParse.__init__(self, html, time_list, schedule_type)
        self.merger_same_schedule()

    def merger_same_schedule(self):
        """
        :param day_schedule: 一天的课程
        :param section_schedule: 一节课的课程
        :return:
        """
        for day_schedule in self.schedule_list:
            self._merger_day_schedule(day_schedule)

    def _merger_day_schedule(self, day_schedule):
        """
        将同一天相邻的相同两节课合并
        例如：[[["英语", "2节/双周(14-14)", "姓名", "1-301", "2", "[7,8]"],[...]],
        [["英语", "2节/双周(14-14)", "姓名", "1-301", "2", "[7,8]"],[...]]]
        合并为： 课程节数修改
        [[["英语", "2节/双周(14-14)", "姓名", "1-301", "4", "[7,8]"],[...]],
        [[...]]]
        """
        # 先合并 同一节课的相同课程
        for section_schedule in day_schedule:
            self._merger_section_schedule(section_schedule)

        # 再合并 同一天相邻的相同两节课合并
        day_slen = len(day_schedule)
        for i in range(day_slen - 1):
            for last_i, last_schedule in enumerate(day_schedule[i]):
                for next_i, next_schedule in enumerate(day_schedule[i + 1]):
                    if last_schedule and next_schedule:
                        # 课程名 上课地点 上课时间 教师名
                        if last_schedule[0] == next_schedule[0] and \
                            last_schedule[1] == next_schedule[1] and \
                                last_schedule[2] == next_schedule[2] and\
                                last_schedule[3] == next_schedule[3]:

                            day_schedule[i][last_i][4] += day_schedule[i + 1][next_i][4]
                            day_schedule[i + 1][next_i] = []

    @staticmethod
    def _merger_section_schedule(section_schedule):
        """
        将同一节课的相同课程合并
        例如：[["英语", "2节/单周(7-7)", "姓名", "1-301", "2", "[7]"],
         ["英语", "2节/双周(8-8)", "姓名", "1-301", "2", "[8]"]]
         合并为：课程时间修改
         [["英语", "2节/单周(7-7),2节/双周(8-8)", "姓名", "1-301", "2", "[7,8]"]]
        """
        section_slen = len(section_schedule)
        for i in range(section_slen):
            for j in range(i + 1, section_slen):
                if section_schedule[i] and section_schedule[j]:
                    # 课程名 一样时
                    if section_schedule[i][0] == section_schedule[j][0]:
                        # 并且上课时间不同，上课地点 一样时
                        if section_schedule[i][1] != section_schedule[j][1] and \
                                section_schedule[i][3] == section_schedule[j][3]:
                            section_schedule[j][5] += section_schedule[i][5]
                            section_schedule[j][1] += ',' + section_schedule[i][1]
                            section_schedule[i] = []

                        # 课程名和上课时间一样时 将上一个赋为空
                        if section_schedule[i] and section_schedule[i][1] == section_schedule[j][1]:
                            section_schedule[i] = []


1			# -- coding: utf-8 --
2			'''
3			@Time : 2016 - 2018
4			@Author : dairoot
5			@Email : [email protected]
6			@description: 课表解析
7			'''
8			from __future__ import absolute_import, unicode_literals
9
10			import re
11			import six
12			from bs4 import BeautifulSoup
13
14
15			class BaseScheduleParse():
			0 ignored issues – show Unused Code introduced 2018-07-19 02:41 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
16			''' 课表页面解析模块 '''
17			COlOR = ['green', 'blue', 'purple', 'red', 'yellow']
18
19			def __init__(self, html, time_list, schedule_type):
20			self.schedule_year = ''
21			self.schedule_term = ''
22			self.time_list = time_list
23			self.schedule_type = schedule_type
24			self.schedule_list = [[], [], [], [], [], [], []]
25			self.schedule_dict = [[], [], [], [], [], [], []]
26
27			soup = BeautifulSoup(html, "html.parser")
28			option_args = soup.find_all("option", {"selected": "selected"})
29			if option_args:
30			self.schedule_year = option_args[0].text
31			self.schedule_term = option_args[1].text
32			table = soup.find("table", {"id": "Table6"}) if \
33			schedule_type == 1 else soup.find("table", {"id": "Table1"})
34			trs = table.find_all('tr')
35			self.html_parse(trs)
36
37			def html_parse(self, trs):
38			"""
39			:param n+1: 为周几
40			:param i-1: 为第几节
41			:param arr: ["课程", "时间", "姓名", "地点", "节数", "周数数组"]
42			:param row_arr: 为周几第几节的课程信息
43			:param rowspan: 表示该课程有几节课
44			:return:
45			"""
46			pattern = r'^\([\u2E80-\u9FFF]{1,3}\d+\)'
47			# 每天最多有10节课, 数据从2到12, (i-1) 代表是第几节课 (偶数节不获取)
48			for i in range(2, 12, 2):
49			tds = trs[i].find_all("td")
50			# 去除无用数据，比如(上午, 第一节... 等等)
51			if i in [2, 6, 10]:
52			tds.pop(0)
53			tds.pop(0)
54			# 默认获取7天内的课表(周一到周日) tds 长度为7
55			for day, day_c in enumerate(tds):
56			row_arr = []
57			if day_c.text != u' ':
58			td_str = day_c.__unicode__()
59			rowspan = 2 if 'rowspan="2"' in td_str else 1
60			td_main = re.sub(r'<td align="Center".*?>', '', td_str)[:-5]
61
62			for text in td_main.split('<br/><br/>'):
63			course_arr = self._get_td_course_info(text)
64			if course_arr[0] and not re.match(pattern, course_arr[0]):
65			course_arr[1] = self._get_weeks_text(course_arr[1])
66			weeks_arr = self._get_weeks_arr(course_arr[1])
67			row_arr.append(course_arr + [rowspan, weeks_arr])
68			self.schedule_list[day].append(row_arr)
69
70			def get_schedule_dict(self):
71			''' 返回课表数据字典格式 '''
72
73			for day, day_schedule in enumerate(self.schedule_list):
74			for section, section_schedule in enumerate(day_schedule):
75			section_schedule_dict = []
76			color_index = (day * 3 + section + 1) % 5
77			for schedule in section_schedule:
78			if schedule:
79			section_schedule_dict.append({
80			"color": self.COlOR[color_index],
81			"name": schedule[0],
82			"weeks_text": schedule[1],
83			"teacher": schedule[2],
84			"place": schedule[3],
85			"section": schedule[4],
86			"weeks_arr": schedule[5],
87			"time": self.time_list[schedule[4]][section]
88			})
89			self.schedule_dict[day].append(section_schedule_dict)
90
91			schedule_data = {
92			'schedule_term': self.schedule_term,
93			'schedule_year': self.schedule_year,
94			'schedule': self.schedule_dict
95			}
96			return schedule_data
97
98			def _get_weeks_text(self, class_time):
99			''' 课程周数文本 '''
100			if not self.schedule_type:
101			weeks_text = re.findall(r"{(.*)}", class_time)[0]
102			else:
103			# 2节/周
104			# 2节/单周(7-7)
105			# 1-10,13-18(1,2)
106			if '2节/' in class_time:
107			weeks_text = class_time if '(' in class_time else class_time + '(1-18)'
108			else:
109			weeks_text = class_time.split('(')[0]
110			return weeks_text
111
112			@staticmethod
113			def _get_weeks_arr(weeks_text):
114			"""
115			将上课时间转成数组形式
116			:param class_time: 上课时间
117			:param weeks_text: 课程周数文本
118			:param weeks_arr: 上课周数数组
119			:return:
120			"""
121			weeks_arr = []
122			step = 2 if '单' in weeks_text or '双' in weeks_text else 1
123			for split_text in weeks_text.split(','):
124			weeks = re.findall(r'(\d{1,2})-(\d{1,2})', split_text)
125
126			if weeks:
127			weeks_arr += range(int(weeks[0][0]), int(weeks[0][1]) + 1, step)
128			else:
129			weeks_arr += [int(split_text)]
130
131			return weeks_arr
132
133			@staticmethod
134			def _get_td_course_info(text):
135			''' 获取td标签的课程信息 '''
136			text = re.sub(r'<[/]{0,1}font[^>]*?>', '', text)
137			text = re.sub(r'^<br/>', '', text)
138
139			if six.PY2:
140			# 以下兼容 python2 版本解析处理
141			text = re.sub(r'</br></br></br>$', '', text)
142			text = text.replace('<br>', '<br/>')
143
144			info_arr = []
145			for k in text.split('<br/>'):
146			if k not in ['选修', '公选', '必修']:
147			info_arr.append(k)
148
149			info_arr = info_arr[:4:]
150			if len(info_arr) == 3:
151			# 没有上课地点的情况
152			info_arr.append('')
153			return info_arr
154
155
156			class ScheduleParse(BaseScheduleParse):
			0 ignored issues – show Unused Code introduced 2018-07-19 02:41 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
157			''' 课表节数合并 '''
158
159			def __init__(self, html, time_list, schedule_type=0):
160			BaseScheduleParse.__init__(self, html, time_list, schedule_type)
161			self.merger_same_schedule()
162
163			def merger_same_schedule(self):
164			"""
165			:param day_schedule: 一天的课程
166			:param section_schedule: 一节课的课程
167			:return:
168			"""
169			for day_schedule in self.schedule_list:
170			self._merger_day_schedule(day_schedule)
171
172			def _merger_day_schedule(self, day_schedule):
173			"""
174			将同一天相邻的相同两节课合并
175			例如：[[["英语", "2节/双周(14-14)", "姓名", "1-301", "2", "[7,8]"],[...]],
176			[["英语", "2节/双周(14-14)", "姓名", "1-301", "2", "[7,8]"],[...]]]
177			合并为：课程节数修改
178			[[["英语", "2节/双周(14-14)", "姓名", "1-301", "4", "[7,8]"],[...]],
179			[[...]]]
180			"""
181			# 先合并同一节课的相同课程
182			for section_schedule in day_schedule:
183			self._merger_section_schedule(section_schedule)
184
185			# 再合并同一天相邻的相同两节课合并
186			day_slen = len(day_schedule)
187			for i in range(day_slen - 1):
188			for last_i, last_schedule in enumerate(day_schedule[i]):
189			for next_i, next_schedule in enumerate(day_schedule[i + 1]):
190			if last_schedule and next_schedule:
191			# 课程名上课地点上课时间教师名
192			if last_schedule[0] == next_schedule[0] and \
193			last_schedule[1] == next_schedule[1] and \
194			last_schedule[2] == next_schedule[2] and\
195			last_schedule[3] == next_schedule[3]:
196
197			day_schedule[i][last_i][4] += day_schedule[i + 1][next_i][4]
198			day_schedule[i + 1][next_i] = []
199
200			@staticmethod
201			def _merger_section_schedule(section_schedule):
202			"""
203			将同一节课的相同课程合并
204			例如：[["英语", "2节/单周(7-7)", "姓名", "1-301", "2", "[7]"],
205			["英语", "2节/双周(8-8)", "姓名", "1-301", "2", "[8]"]]
206			合并为：课程时间修改
207			[["英语", "2节/单周(7-7),2节/双周(8-8)", "姓名", "1-301", "2", "[7,8]"]]
208			"""
209			section_slen = len(section_schedule)
210			for i in range(section_slen):
211			for j in range(i + 1, section_slen):
212			if section_schedule[i] and section_schedule[j]:
213			# 课程名一样时
214			if section_schedule[i][0] == section_schedule[j][0]:
215			# 并且上课时间不同，上课地点一样时
216			if section_schedule[i][1] != section_schedule[j][1] and \
217			section_schedule[i][3] == section_schedule[j][3]:
218			section_schedule[j][5] += section_schedule[i][5]
219			section_schedule[j][1] += ',' + section_schedule[i][1]
220			section_schedule[i] = []
221
222			# 课程名和上课时间一样时将上一个赋为空
223			if section_schedule[i] and section_schedule[i][1] == section_schedule[j][1]:
224			section_schedule[i] = []
225

dairoot / school-api

Push — master ( 9faf20...545d25 )

school_api.client.api.utils.schedule_parse.ScheduleParse._merger_section_schedule() C

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like