Completed
Push — master ( 8c3d23...8de117 )
by Daisuke
9s
created

AbstractGenerator   B

Complexity

Total Complexity 40

Size/Duplication

Total Lines 261
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 261
rs 8.2608
wmc 40

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 8 1
B write_docx() 0 28 5
A _insert_image() 0 12 3
A read_xlsx() 0 4 1
F _write_doc_aini2016() 0 155 18
B _toArray() 0 5 5
B _write_doc_jscpb2016() 0 28 3
A _removeParentheses() 0 12 4

How to fix   Complexity   

Complex Class

Complex classes like AbstractGenerator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
"""
3
Generate abstract document (docx) file from table (xlsx)
4
by nebula
5
6
Dependency: pandas, xlrd, python-docx
7
"""
8
import pandas as pd
9
import docx
10
import math
11
import re
12
import os
13
14
15
class AbstractGenerator:
16
    def __init__(self, image_dir='', template_type='aini2016'):
17
        self.records = None
18
        self.image_dir = image_dir
19
        self.template_type = template_type
20
        self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
21
        self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
22
        self.exreg4super = re.compile(r'(\(\w+\))')
23
        self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')
24
25
    def _insert_image(self, filename, image_filename):
26
        doc = docx.Document(filename)
27
28
        for paragraph in doc.paragraphs:
29
            if '[[FIGURE]]' in paragraph.text:
30
                #paragraph.text = ''
31
                run = paragraph.add_run()
32
                run.add_paragraph()
33
                inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
34
                run.add_paragraph()
35
36
        doc.save(filename)
37
38
    def _toArray(self, text, delim):
39
        if isinstance(text, float) and math.isnan(text):
40
            return []
41
        items = text.split(delim)
42
        return [item for item in items if item.strip()]
43
44
    def _removeParentheses(self, text):
45
        exreg = re.compile(r'\((\w+)\)')
46
        nums = exreg.split(text)
47
        num = ''
48
        for n in nums:
49
             n = n.strip()
50
             if n == '':
51
                 continue
52
             if num != '':
53
                 num += ', '
54
             num += n
55
        return num
56
57
    def read_xlsx(self, filename):
58
        print('Reading: %s' % filename)
59
        exls = pd.ExcelFile(filename)
60
        self.records = exls.parse()
61
62
    def write_docx(self, filename, template=None):
63
        print('Writing: %s' % filename)
64
65
        if template is not None:
66
            doc = docx.Document(template)
67
        else:
68
            doc = docx.Document()
69
70
        first = True
71
        for i in self.records.index:
72
            if first == True:
73
                section = doc.sections[0]
74
            else:
75
                section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
76
            section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
77
            section.page_height = docx.shared.Mm(297)
78
            section.page_width = docx.shared.Mm(210)
79
            section.top_margin = docx.shared.Mm(20)
80
            section.right_margin = docx.shared.Mm(20)
81
            section.left_margin = docx.shared.Mm(20)
82
            section.bottom_margin = docx.shared.Mm(15)
83
            if self.template_type == 'aini2016':
84
                self._write_doc_aini2016(doc, self.records.loc[i])
85
            else:
86
                self._write_doc_jscpb2016(doc, self.records.loc[i])
87
            first = False
88
89
        doc.save(filename)
90
91
    def _write_doc_jscpb2016(self, doc, record):
92
        print('"%s"' % record['title'])
93
94
        # Title
95
        p = doc.add_paragraph(record.title)
96
        p.runs[0].font.size = docx.shared.Pt(12)
97
        p.runs[0].bold = True
98
99
        # Authors
100
        p = doc.add_paragraph()
101
        author_list = self.exreg4super.split(record.authors)
102
        for j in range(len(author_list)):
103
            if j & 1:
104
                p.add_run(author_list[j]).font.superscript = True
105
            else:
106
                p.add_run(author_list[j])
107
108
        # Affiliations
109
        p = doc.add_paragraph(record.affiliations)
110
        p.runs[0].font.size = docx.shared.Pt(9)
111
        p.runs[0].italic = True
112
113
        # Abstract Body
114
        p = doc.add_paragraph(record.abstract)
115
116
        # keywords
117
        p = doc.add_paragraph('Keywords: ')
118
        p.add_run(record.keywords).italic = True
119
120
121
    def _write_doc_aini2016(self, doc, record):
122
        print('"%s"' % record['Title'])
123
        exreg4num = re.compile(r'\((\w+)\)')
124
125
        font = doc.styles['Normal'].font
126
        font.size = docx.shared.Pt(10)
127
        font.name = 'Lucida Grande'
128
129
        # Program Number
130
        #p = doc.add_paragraph()
131
        #p.paragraph_format.line_spacing = docx.shared.Pt(12)
132
        #p.paragraph_format.space_after = docx.shared.Pt(5)
133
        #r = p.add_run(record['Program No.'].strip())
134
135
        # Title
136
        p = doc.add_paragraph()
137
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
138
        p.paragraph_format.space_before = docx.shared.Pt(25)
139
        p.paragraph_format.space_after = docx.shared.Pt(14)
140
        r = p.add_run(record['Title'].strip())
141
        r.font.size = docx.shared.Pt(12)
142
        r.bold = True
143
        r.italic = True
144
145
        # Authors
146
        p = doc.add_paragraph()
147
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
148
        p.paragraph_format.line_spacing = docx.shared.Pt(12)
149
        p.paragraph_format.space_after = docx.shared.Pt(12)
150
        authors = self._toArray(record['Name'], '\n')
151
        first = True
152
        for author in authors:
153
            m = self.exreg4author.match(author)
154
            if first == False:
155
                p.add_run(', ').bold = True
156
            name = m.group(1).strip().replace(' ', '\u00A0')
157
            num = self._removeParentheses(m.group(2).strip())
158
            p.add_run(name).bold = True
159
            if num != '':
160
                r = p.add_run(num)
161
                r.bold = True
162
                r.font.superscript = True
163
            first = False
164
        p.add_run('\n')
165
166
        # Affiliation
167
        affiliations = self._toArray(record['Affiliation'], '\n')
168
        first = True
169
        for affiliation in affiliations:
170
            m = self.exreg4affiliation.match(affiliation)
171
            if first == False:
172
                p.add_run(', ')
173
            num = self._removeParentheses(m.group(1).strip())
174
            name = m.group(2).strip()
175
            if num != '':
176
                r = p.add_run(num + ' ')
177
                r.font.superscript = True
178
            p.add_run(name)
179
            first = False
180
        p.add_run('\n' + record['e-mail'])
181
182
        # DOI
183
        p = doc.add_paragraph('DOI:' + record['DOI'].strip())
184
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
185
        p.paragraph_format.space_after = docx.shared.Pt(12)
186
187
        # Abstract Body
188
        items = self._toArray(record['Abstract'], '\n')
189
        for item in items:
190
            p = doc.add_paragraph(item)
191
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
192
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
193
194
        p.paragraph_format.space_after = docx.shared.Pt(12)
195
196
        # Figure
197
        doc.add_picture(os.path.join(self.image_dir, record['Figure file Name']))
198
        p = doc.paragraphs[-1]
199
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
200
201
        # Figure Comment
202
        items = self._toArray(record['Figure comment'], '\n')
203
        first = True
204
        for item in items:
205
            p = doc.add_paragraph()
206
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
207
            p.paragraph_format.space_after = docx.shared.Pt(0)
208
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
209
            if first:
210
                p.add_run('Figure: ').bold = True
211
                first = False
212
            p.add_run(item)
213
        p.paragraph_format.space_after = docx.shared.Pt(14)
214
215
        # References
216
        items = self._toArray(record['References'], '\n')
217
        first = True
218
        for item in items:
219
            if first:
220
                p = doc.add_paragraph()
221
                p.paragraph_format.line_spacing = docx.shared.Pt(11)
222
                p.paragraph_format.space_after = docx.shared.Pt(0)
223
                p.add_run('References:').bold = True
224
                first = False
225
            p = doc.add_paragraph()
226
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
227
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
228
            p.paragraph_format.space_after = docx.shared.Pt(0)
229
            p.add_run(item)
230
        p.paragraph_format.space_after = docx.shared.Pt(10)
231
232
        # Acknowledgement
233
        items = self._toArray(record['Acknowledgement'], '\n')
234
        first = True
235
        for item in items:
236
            p = doc.add_paragraph()
237
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
238
            p.paragraph_format.space_after = docx.shared.Pt(0)
239
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
240
            if first:
241
                p.add_run('Ackknowledgement: ').bold = True
242
                first = False
243
            p.add_run(item)
244
        p.paragraph_format.space_after = docx.shared.Pt(10)
245
246
        # Funding
247
        items = self._toArray(record['Funding'], '\n')
248
        first = True
249
        for item in items:
250
            p = doc.add_paragraph()
251
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
252
            p.paragraph_format.space_after = docx.shared.Pt(0)
253
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
254
            if first:
255
                p.add_run('Funding: ').bold = True
256
                first = False
257
            p.add_run(item)
258
        p.paragraph_format.space_after = docx.shared.Pt(10)
259
260
        # Citation
261
        p = doc.add_paragraph()
262
        p.paragraph_format.line_spacing = docx.shared.Pt(10)
263
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
264
        p.add_run('Citation: ').bold = True
265
        author_tmp = ''
266
        first = True
267
        for author in authors:
268
            m = self.exreg4author.match(author)
269
            if first == False:
270
                author_tmp += ', '
271
            author_tmp += m.group(1).strip()
272
            first = False
273
        p.add_run(author_tmp + ' (2016). ' + record['Title'] + '. ')
274
        p.add_run('Advances in Neuroinformatics IV. ').italic = True
275
        p.add_run('AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record['DOI'].strip())
276
277
278
if __name__ == '__main__':
279
    img_dir = './image'
280
    input_xlsx = 'input.xlsx'
281
    output_docx = 'output.docx'
282
    template_docx = './template/aini2016.docx'
283