GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

csvScriptBuilder.add_group_by()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 2

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 2
rs 10
1
import requests
2
import codecs
3
import os
4
import json
5
import uuid
6
import re
7
import csv
8
9
class csvScriptBuilder:
10
11
	MAX_PROPERTY_NUMBER = 4000
12
13
	CSV_FILE_NAME = "constraints.csv"
14
15
	CONSTRAINT_BEGIN_STRING = "{{Constraint:"
16
17
	def __init__(self):
18
		self.parameters = {}
19
		self.constraint_name = ""
20
21
	def find_next_seperator(self, constraint_parameters, equal_sign):
22
		next_equal_sign = constraint_parameters.find('=', equal_sign + 1)
23
		if next_equal_sign == -1:
24
			next_seperator = len(constraint_parameters)
25
		else:
26
			next_seperator = constraint_parameters.rfind('|', equal_sign, next_equal_sign)
27
		if next_seperator == -1:
28
			next_seperator = len(constraint_parameters)
29
		else:
30
			next_seperator = next_seperator + 1
31
		return next_seperator
32
33
	def to_comma_seperated_string(self, values):
34
		return values.replace("{", "").replace("}", "").replace("|", "").replace(" ", "").replace("[", "").replace("]", "").strip()
35
36
	def add_property(self, values):
37
		self.parameters['property'] = values.strip()
38
39
	def add_classes(self, values):
40
		self.parameters['class'] = self.to_comma_seperated_string(values)
41
42
	def add_exceptions(self, values):
43
		self.parameters['known_exception'] = self.to_comma_seperated_string(values).replace(";", ",")
44
45
	def add_group_by(self, values):
46
		self.parameters['group_by'] = values.strip()
47
48
	def add_items(self, values):
49
		itemString = ""
50
		snakString = ""
51
		for element in self.to_comma_seperated_string(values).split(","):
52
			if element.startswith("Q"):
53
				itemString = itemString + element + ","
54
			elif element.lower() == "somevalue" or element.lower() == "novalue":
55
				snakString = snakString + element + ","
56
		if itemString != "":
57
			self.parameters['item'] = itemString.rstrip(",")
58
		if snakString != "":
59
			self.parameters['snak'] = snakString.rstrip(",")
60
61
	def add_list(self, values):
62
		if self.constraint_name == "Qualifiers" or self.constraint_name == "Mandatory qualifiers":
63
			self.parameters['property'] = self.to_comma_seperated_string(values)
64
		else:
65
			self.list_parameter = self.to_comma_seperated_string(values)
66
67
	def set_constraint_name(self, values):
68
		if values == 'true':
69
			self.constraint_name = 'Mandatory qualifiers'
70
71
	def add_status(self, values):
72
		self.parameters['constraint_status'] = 'mandatory'
73
74
	def add_max(self, values):
75
		self.parameters['maximum_quantity'] = values.strip()
76
77
	def add_min(self, values):
78
		self.parameters['minimum_quantity'] = values.strip()
79
80
	def add_namespace(self, values):
81
		self.parameters['namespace'] = values.strip()
82
83
	def add_pattern(self, values):
84
		self.parameters['pattern'] = values.strip()
85
86
	def add_relation(self, values):
87
		self.parameters['relation'] = values.strip()	
88
89
	def write_one_line(self, property_number, constraint_name):
90
		self.write_element_into_csv(property_number, constraint_name)
91
		self.reset_parameter()
92
93
	def write_multiple_lines(self, property_number, constraint_name):
94
		for line in self.list_parameter.split(';'):
95
			self.split_list_parameter(line)
96
			self.write_element_into_csv(property_number, constraint_name)
97
			self.parameters.pop('item', None)
98
		self.reset_parameter()
99
100
	def write_into_csv_file(self, property_number, constraint_name):
101
		if self.list_parameter != 'NULL':
102
			self.write_multiple_lines(property_number, constraint_name)
103
		else:
104
			self.write_one_line(property_number, constraint_name)
105
106
	def write_element_into_csv(self, property_number, constraint_name):
107
		json_blob_string = json.dumps(self.parameters).replace("&lt;nowiki>","").replace("&lt;/nowiki>","").replace("&amp;lt;nowiki&amp;lt;","").replace("&amp;lt;/nowiki&amp;gt;","").replace("<nowiki>","").replace("</nowiki>","")
108
		self.csv_writer.writerow((str(uuid.uuid4()), str(property_number),  constraint_name.strip(), json_blob_string))
109
110
111
	def split_list_parameter(self, line):
112
		if ':' in line:
113
			self.parameters['item'] = line[line.index(':')+1:]
114
			self.parameters['property'] = line[:line.index(':')]
115
		else:
116
			self.parameters['property'] = line
117
118
	def reset_parameter(self):
119
		self.parameters = {}
120
		self.list_parameter = 'NULL'
121
122
123
	def get_constraint_part(self, property_talk_page):
124
		start = property_talk_page.find("{{Constraint:")		 	
125
		end = property_talk_page.find("==", start)
126
		if end != -1:
127
			property_talk_page = property_talk_page[start:end]
128
		else:
129
			property_talk_page = property_talk_page[start:]
130
131
		#delete <!-- --> comments from site
132
		open_index = property_talk_page.find("&lt;!--")
133
		while open_index != -1:
134
			close_index = property_talk_page.find("-->", open_index)
135
			if close_index == -1:
136
				break
137
				
138
			property_talk_page = property_talk_page[:open_index] + property_talk_page[close_index+3:]
139
			
140
			open_index = property_talk_page.find("&lt;!--")	
141
142
		return property_talk_page
143
144
145
	def progress_print(self, number, maxNumber):
146
		if number % 10 == 0:
147
			print(str(number) + "/" + str(maxNumber))
148
149
150
	def property_exists(self, propertyTalkPage):
151
		# return not (propertyTalkPage.find("Creating Property talk") != -1 or 
152
		# 	propertyTalkPage == "")
153
		regex = re.compile('<title>(.*)</title>')
154
		match = regex.search(propertyTalkPage)
155
		if match:
156
			return not "Creating Property talk" in match.group(0)
157
		else:
158
			return False
159
160
161
	def get_constraint_end_index(self, constraintPart):
162
		#match brackets to find end of constraint
163
		count = 2
164
		for i, c in enumerate(constraintPart):
165
			if c == '{':
166
				count += 1
167
			elif c == '}':
168
				count -= 1
169
			if count == 0:
170
				return (i - 1)
171
172
173
	def split_constraint_block(self, constraint_part):
174
		start_index = constraint_part.find(self.CONSTRAINT_BEGIN_STRING)
175
		if start_index != -1:
176
			start_index += len(self.CONSTRAINT_BEGIN_STRING)
177
			constraint_part = constraint_part[start_index:]
178
179
			end_index = self.get_constraint_end_index(constraint_part)
180
			constraint_string = constraint_part[:end_index]
181
			remaining_constraint = constraint_part[end_index:]
182
183
			return constraint_string, remaining_constraint
184
		else:
185
			return "", ""
186
187
188
	call_method = {
189
	    'base_property' : add_property,
190
	    'class' : add_classes,
191
	    'classes' : add_classes,
192
	    'exceptions' : add_exceptions,
193
	    'group by'  : add_group_by,
194
	    'group property' : add_group_by,
195
	    'item' : add_items,
196
	    'items' : add_items,
197
	    'list' : add_list,
198
	    'mandatory' : add_status,
199
	    'max' : add_max,
200
	    'min' : add_min,
201
	    'namespace' : add_namespace,
202
	    'pattern' : add_pattern,
203
	    'property' : add_property,
204
	    'relation' : add_relation,
205
	    'required' : set_constraint_name,
206
	    'value' : add_items,
207
	    'values' : add_items
208
	}
209
210
211
	def split_parameters(self, constraint_parameters):
212
		equal_sign_pos = constraint_parameters.find('=')
213
		next_seperator = self.find_next_seperator(constraint_parameters, equal_sign_pos)
214
		value_end_pos = max(-1, next_seperator - 1)
215
216
		parameter_name = constraint_parameters[:equal_sign_pos].strip()
217
		parameter_value = constraint_parameters[equal_sign_pos + 1 : value_end_pos]
218
		remaining_constraint_parameters = constraint_parameters[next_seperator:]
219
220
		return parameter_name, parameter_value, remaining_constraint_parameters
221
222
223
	def add_all_parameters(self, constraint_parameters):
224
		while constraint_parameters != None and constraint_parameters.find('=') != -1:
225
			p_name, p_value, constraint_parameters = self.split_parameters(constraint_parameters)	
226
			try:
227
				self.call_method[p_name](self, p_value)
228
			except KeyError, e:  # other Exceptions will be raised
229
				pass
230
231
	def process_constraint_part(self, constraint_part, property_number):
232
		constraint_string, remaining_constraint = self.split_constraint_block(constraint_part)
233
		while constraint_string != "":
234
			self.constraint_name = None
235
			self.list_parameter = 'NULL'
236
237
			delimiter_index = constraint_string.find('|')
238
239
			if delimiter_index == -1:
240
				self.constraint_name = constraint_string
241
			else:			
242
				self.constraint_name = constraint_string[:delimiter_index]
243
				constraint_parameters = constraint_string[delimiter_index+1:]
244
				self.add_all_parameters(constraint_parameters)
245
					
246
			self.write_into_csv_file(property_number, self.constraint_name)
247
248
			constraint_string, remaining_constraint = self.split_constraint_block(remaining_constraint)
249
250
251
	def get_property_talk_page(self, property_number):
252
		url = "http://www.wikidata.org/w/index.php?title=Property_talk:P" + \
253
			  str(property_number) + "&action=edit"
254
		property_talk_page = requests.get(url).text
255
		return property_talk_page
256
257
258
	def process_property_talk_page(self, property_number):
259
		property_talk_page = self.get_property_talk_page(property_number)
260
		if self.property_exists(property_talk_page):
261
			constraintPart = self.get_constraint_part(property_talk_page)
262
			self.process_constraint_part(constraintPart, property_number)
263
264
265
	# only purpose: Build csv-Statement to fill table with constraints
266
	# fetches constraints from property talk pages
267
	# nonetheless: use table layout that will suit the new way of storing 
268
	# constraints as statements on properties
269
270
	def run(self):
271
		with open(self.CSV_FILE_NAME, 'wb') as csv_file:
272
			self.csv_writer = csv.writer(csv_file)
273
			for property_number in range(1, self.MAX_PROPERTY_NUMBER+1):
274
275
				self.progress_print(property_number, self.MAX_PROPERTY_NUMBER)
276
277
				self.process_property_talk_page(property_number)
278
279
280
def main():
281
	builder = csvScriptBuilder()
282
	builder.run()
283
284
if __name__ == "__main__": main()
285