AbstractGenerator._write_doc_aini2016()   F
last analyzed

Complexity

Conditions 20

Size

Total Lines 167

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 20
c 2
b 0
f 0
dl 0
loc 167
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like AbstractGenerator._write_doc_aini2016() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
"""
3
Generate abstract document (docx) file from table (xlsx)
4
by nebula
5
6
Dependency: pandas, xlrd, python-docx, pillow
7
"""
8
from PIL import Image
9
import pandas as pd
10
import docx
11
import math
12
import re
13
import os
14
15
16
class AbstractGenerator:
17
    def __init__(self, image_dir='', template_type='aini2016'):
18
        self.records = None
19
        self.image_dir = image_dir
20
        self.template_type = template_type
21
        self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
22
        self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
23
        self.exreg4super = re.compile(r'(\(\w+\))')
24
        # self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')
25
        self.exreg4italic = re.compile(r'(\<i\>.*?\</i\>)')
26
        self.exreg4sup = re.compile(r'(\<sup\>.*?\</sup\>)')
27
        self.exreg4sub = re.compile(r'(\<sub\>.*?\</sub\>)')
28
        self.exreg4tags = re.compile(r'(\<.*?\>.*?\</.*?\>)')
29
        self.exreg4tag_strip = re.compile(r'<[^>]*?>')
30
        self.preferredImageMaxWidth = 14  # cm
31
        self.preferredImageMaxHeight = 8.5  # cm
32
        self.preferredImageDpi = 72
33
34
    def _insert_image(self, filename, image_filename):
35
        doc = docx.Document(filename)
36
37
        for paragraph in doc.paragraphs:
38
            if '[[FIGURE]]' in paragraph.text:
39
                # paragraph.text = ''
40
                run = paragraph.add_run()
41
                run.add_paragraph()
42
                inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
43
                run.add_paragraph()
44
45
        doc.save(filename)
46
47
    @staticmethod
48
    def _empty(text):
49
        if isinstance(text, float) and math.isnan(text):
50
            return True
51
        return text.strip() == ''
52
53
    def _to_array(self, text, delim):
54
        if self._empty(text):
55
            return []
56
        items = text.split(delim)
57
        return [item for item in items if item.strip()]
58
59
    @staticmethod
60
    def _remove_parentheses(text):
61
        exreg = re.compile(r'\((\w+)\)')
62
        nums = exreg.split(text)
63
        num = ''
64
        for n in nums:
65
            n = n.strip()
66
            if n == '':
67
                continue
68
            if num != '':
69
                num += ', '
70
            num += n
71
        return num
72
73
    @staticmethod
74
    def _get_image_size(pixel, dpi):
75
        return pixel / dpi * 2.54
76
77
    def _get_preferred_image_size(self, fpath):
78
        img = Image.open(fpath)
79
        dpi = (self.preferredImageDpi, self.preferredImageDpi)
80
        if 'dpi' in img.info:
81
            dpi = img.info['dpi']
82
        if 'jfif_density' in img.info:
83
            dpi = img.info['jfif_density']
84
        width = self._get_image_size(img.size[0], dpi[0])
85
        height = self._get_image_size(img.size[1], dpi[1])
86
        if width > self.preferredImageMaxWidth:
87
            height = height * self.preferredImageMaxWidth / width
88
            width = self.preferredImageMaxWidth
89
        if height > self.preferredImageMaxHeight:
90
            width = width * self.preferredImageMaxHeight / height
91
            height = self.preferredImageMaxHeight
92
        # print('image: %s(w:%dpx(%gcm),h:%dpx(%gcm),dpi:%s) -> (w:%gcm,h:%gcm)' % (fpath, img.size[0], self._getImageSize(img.size[0], dpi[0]), img.size[1], self._getImageSize(img.size[1], dpi[1]), dpi, width, height))
93
        img.close()
94
        return docx.shared.Cm(width), docx.shared.Cm(height)
95
96
    def _apply_it_sup_sub(self, doc, body, debug=False):
97
        p = doc.add_paragraph()
98
        body_split = self.exreg4tags.split(body)
99
100
        for split in body_split:
101
            italic_mode = False
102
            sup_mode = False
103
            sub_mode = False
104
105
            if self.exreg4italic.match(split):
106
                italic_mode = True
107
            if self.exreg4sup.match(split):
108
                sup_mode = True
109
            if self.exreg4sub.match(split):
110
                sub_mode = True
111
112
            if debug:
113
                run = p.add_run(split)
114
            else:
115
                run = p.add_run(self.exreg4tag_strip.sub('', split))
116
117
            run.italic = italic_mode
118
            run.font.superscript = sup_mode
119
            run.font.subscript = sub_mode
120
121
        return p
122
123
    def read_xlsx(self, filename):
124
        print('Reading: %s' % filename)
125
        exls = pd.ExcelFile(filename)
126
        self.records = exls.parse()
127
128
    def write_docx(self, filename, template=None):
129
        print('Writing: %s' % filename)
130
131
        if template is not None:
132
            doc = docx.Document(template)
133
        else:
134
            doc = docx.Document()
135
136
        first = True
137
        for i in self.records.index:
138
            if first:
139
                section = doc.sections[0]
140
            else:
141
                section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
142
            section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
143
            section.page_height = docx.shared.Mm(297)
144
            section.page_width = docx.shared.Mm(210)
145
            section.top_margin = docx.shared.Mm(20)
146
            section.right_margin = docx.shared.Mm(20)
147
            section.left_margin = docx.shared.Mm(20)
148
            section.bottom_margin = docx.shared.Mm(15)
149
            if self.template_type == 'aini2016':
150
                self._write_doc_aini2016(doc, self.records.loc[i])
151
            else:
152
                self._write_doc_jscpb2016(doc, self.records.loc[i])
153
            first = False
154
155
        doc.save(filename)
156
157
    def _write_doc_jscpb2016(self, doc, record):
158
        print(record.title)
159
160
        # Title
161
        # p = doc.add_paragraph(record.title)
162
        p = self._apply_it_sup_sub(doc, record.title)
163
        for run in p.runs:
164
            run.font.size = docx.shared.Pt(12)
165
            run.bold = True
166
167
        # Authors
168
        p = doc.add_paragraph()
169
        author_list = self.exreg4super.split(record.authors)
170
        for j in range(len(author_list)):
171
            if j & 1:
172
                p.add_run(author_list[j]).font.superscript = True
173
            else:
174
                p.add_run(author_list[j])
175
176
        # Affiliations
177
        p = doc.add_paragraph(record.affiliations)
178
        p.runs[0].font.size = docx.shared.Pt(9)
179
        p.runs[0].italic = True
180
181
        # Abstract Body
182
        self._apply_it_sup_sub(doc, record.abstract)
183
184
        # keywords
185
        p = doc.add_paragraph('Keywords: ')
186
        p.add_run(record.keywords).italic = True
187
188
    def _write_doc_aini2016(self, doc, record):
189
        print('"%s"' % record['Title'])
190
        exreg4num = re.compile(r'\((\w+)\)')
191
192
        font = doc.styles['Normal'].font
193
        font.size = docx.shared.Pt(10)
194
        font.name = 'Times New Roman'
195
196
        # Program Number
197
        # p = doc.add_paragraph()
198
        # p.paragraph_format.line_spacing = docx.shared.Pt(12)
199
        # p.paragraph_format.space_after = docx.shared.Pt(5)
200
        # r = p.add_run(record['Program No.'].strip())
201
202
        # Title
203
        p = doc.add_paragraph()
204
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
205
        p.paragraph_format.space_before = docx.shared.Pt(25)
206
        p.paragraph_format.space_after = docx.shared.Pt(14)
207
        r = p.add_run(record['Title'].strip())
208
        r.font.size = docx.shared.Pt(12)
209
        r.bold = True
210
        r.italic = True
211
212
        # Authors
213
        p = doc.add_paragraph()
214
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
215
        p.paragraph_format.line_spacing = docx.shared.Pt(12)
216
        p.paragraph_format.space_after = docx.shared.Pt(12)
217
        authors = self._to_array(record['Name'], '\n')
218
        first = True
219
        for author in authors:
220
            m = self.exreg4author.match(author)
221
            if not first:
222
                p.add_run(', ').bold = True
223
            name = m.group(1).strip().replace(' ', '\u00A0')
224
            num = self._remove_parentheses(m.group(2).strip())
225
            p.add_run(name).bold = True
226
            if num != '':
227
                r = p.add_run('\u00A0' + num)
228
                r.bold = True
229
                r.font.superscript = True
230
            first = False
231
        p.add_run('\n')
232
233
        # Affiliation
234
        affiliations = self._to_array(record['Affiliation'], '\n')
235
        first = True
236
        for affiliation in affiliations:
237
            m = self.exreg4affiliation.match(affiliation)
238
            if not first:
239
                p.add_run(', ')
240
            num = self._remove_parentheses(m.group(1).strip())
241
            name = m.group(2).strip()
242
            if num != '':
243
                r = p.add_run(num + '\u00A0')
244
                r.font.superscript = True
245
            p.add_run(name)
246
            first = False
247
        p.add_run('\n' + record['e-mail'])
248
249
        # DOI
250
        p = doc.add_paragraph('DOI:' + record['DOI'].strip())
251
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
252
        p.paragraph_format.space_after = docx.shared.Pt(12)
253
254
        # Abstract Body
255
        items = self._to_array(record['Abstract'], '\n')
256
        first = True
257
        for item in items:
258
            p = doc.add_paragraph(item)
259
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
260
            p.paragraph_format.line_spacing = docx.shared.Pt(11)
261
            p.paragraph_format.space_after = docx.shared.Pt(2)
262
            if not first:
263
                p.paragraph_format.first_line_indent = docx.shared.Pt(12)
264
            first = False
265
        p.paragraph_format.space_after = docx.shared.Pt(12)
266
267
        # Figure
268
        if not self._empty(record['Figure file Name']):
269
270
            # Figure File Name
271
            img_fpath = os.path.join(self.image_dir, record['Figure file Name'])
272
            size = self._get_preferred_image_size(img_fpath)
273
            doc.add_picture(img_fpath, width=size[0])  # , height=size[1])
274
            p = doc.paragraphs[-1]
275
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
276
277
            # Figure Comment
278
            items = self._to_array(record['Figure comment'], '\n')
279
            first = True
280
            for item in items:
281
                p = doc.add_paragraph()
282
                p.paragraph_format.line_spacing = docx.shared.Pt(10)
283
                p.paragraph_format.space_after = docx.shared.Pt(0)
284
                p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
285
                if first:
286
                    p.add_run('Figure: ').bold = True
287
                    first = False
288
                p.add_run(item)
289
290
        p.paragraph_format.space_after = docx.shared.Pt(14)
291
292
        # References
293
        items = self._to_array(record['References'], '\n')
294
        first = True
295
        for item in items:
296
            if first:
297
                p = doc.add_paragraph()
298
                p.paragraph_format.line_spacing = docx.shared.Pt(11)
299
                p.paragraph_format.space_after = docx.shared.Pt(0)
300
                p.add_run('References:').bold = True
301
                first = False
302
            p = doc.add_paragraph()
303
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
304
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
305
            p.paragraph_format.space_after = docx.shared.Pt(0)
306
            p.add_run(item)
307
        p.paragraph_format.space_after = docx.shared.Pt(10)
308
309
        # Acknowledgement
310
        items = self._to_array(record['Acknowledgement'], '\n')
311
        first = True
312
        for item in items:
313
            p = doc.add_paragraph()
314
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
315
            p.paragraph_format.space_after = docx.shared.Pt(0)
316
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
317
            if first:
318
                p.add_run('Ackknowledgement: ').bold = True
319
                first = False
320
            p.add_run(item)
321
        p.paragraph_format.space_after = docx.shared.Pt(10)
322
323
        # Funding
324
        items = self._to_array(record['Funding'], '\n')
325
        first = True
326
        for item in items:
327
            p = doc.add_paragraph()
328
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
329
            p.paragraph_format.space_after = docx.shared.Pt(0)
330
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
331
            if first:
332
                p.add_run('Funding: ').bold = True
333
                first = False
334
            p.add_run(item)
335
        p.paragraph_format.space_after = docx.shared.Pt(10)
336
337
        # Citation
338
        p = doc.add_paragraph()
339
        p.paragraph_format.line_spacing = docx.shared.Pt(10)
340
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
341
        p.add_run('Citation: ').bold = True
342
        author_tmp = ''
343
        first = True
344
        for author in authors:
345
            m = self.exreg4author.match(author)
346
            if not first:
347
                author_tmp += ', '
348
            author_tmp += m.group(1).strip()
349
            first = False
350
        p.add_run(author_tmp + ' (2016). ' + record['Title'].strip().replace('\n', ' ') + '. ')
351
        p.add_run('Advances in Neuroinformatics IV. ').italic = True
352
        p.add_run(
353
            'AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record[
354
                'DOI'].strip())
355
356
357
if __name__ == '__main__':
358
    img_dir = './image'
359
    input_xlsx = 'input.xlsx'
360
    output_docx = 'output.docx'
361
    template_docx = './template/aini2016.docx'
362