Completed
Pull Request — master (#2)
by Yoshihiro
01:18
created

AbstractGenerator._empty()   A

Complexity

Conditions 3

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
"""
3
Generate abstract document (docx) file from table (xlsx)
4
by nebula
5
6
Dependency: pandas, xlrd, python-docx, pillow
7
"""
8
from PIL import Image
9
import pandas as pd
10
import docx
11
import math
12
import re
13
import os
14
15
16
class AbstractGenerator:
17
    def __init__(self, image_dir='', template_type='aini2016'):
18
        self.records = None
19
        self.image_dir = image_dir
20
        self.template_type = template_type
21
        self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
22
        self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
23
        self.exreg4super = re.compile(r'(\(\w+\))')
24
        self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')
25
        self.preferredImageMaxWidth = 14  # cm
26
        self.preferredImageMaxHeight = 8.5 # cm
27
        self.preferredImageDpi = 72
28
29
    def _insert_image(self, filename, image_filename):
30
        doc = docx.Document(filename)
31
32
        for paragraph in doc.paragraphs:
33
            if '[[FIGURE]]' in paragraph.text:
34
                #paragraph.text = ''
35
                run = paragraph.add_run()
36
                run.add_paragraph()
37
                inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
38
                run.add_paragraph()
39
40
        doc.save(filename)
41
42
    def _empty(self, text):
43
        if isinstance(text, float) and math.isnan(text):
44
            return True
45
        return text.strip() == ''
46
47
    def _toArray(self, text, delim):
48
        if self._empty(text) == True:
49
            return []
50
        items = text.split(delim)
51
        return [item for item in items if item.strip()]
52
53
    def _removeParentheses(self, text):
54
        exreg = re.compile(r'\((\w+)\)')
55
        nums = exreg.split(text)
56
        num = ''
57
        for n in nums:
58
             n = n.strip()
59
             if n == '':
60
                 continue
61
             if num != '':
62
                 num += ', '
63
             num += n
64
        return num
65
66
    def _getImageSize(self, pixel, dpi):
67
        return pixel / dpi * 2.54
68
69
    def _getPreferredImageSize(self, fpath):
70
        img = Image.open(fpath)
71
        dpi = (self.preferredImageDpi, self.preferredImageDpi)
72
        if 'dpi' in img.info:
73
            dpi = img.info['dpi']
74
        if 'jfif_density' in img.info:
75
            dpi = img.info['jfif_density']
76
        width = self._getImageSize(img.size[0], dpi[0])
77
        height = self._getImageSize(img.size[1], dpi[1])
78
        if width > self.preferredImageMaxWidth:
79
            height = height * self.preferredImageMaxWidth / width
80
            width = self.preferredImageMaxWidth
81
        if height > self.preferredImageMaxHeight:
82
            width = width * self.preferredImageMaxHeight / height
83
            height = self.preferredImageMaxHeight
84
        # print('image: %s(w:%dpx(%gcm),h:%dpx(%gcm),dpi:%s) -> (w:%gcm,h:%gcm)' % (fpath, img.size[0], self._getImageSize(img.size[0], dpi[0]), img.size[1], self._getImageSize(img.size[1], dpi[1]), dpi, width, height))
85
        img.close()
86
        return (docx.shared.Cm(width), docx.shared.Cm(height))
87
88
    def read_xlsx(self, filename):
89
        print('Reading: %s' % filename)
90
        exls = pd.ExcelFile(filename)
91
        self.records = exls.parse()
92
93
    def write_docx(self, filename, template=None):
94
        print('Writing: %s' % filename)
95
96
        if template is not None:
97
            doc = docx.Document(template)
98
        else:
99
            doc = docx.Document()
100
101
        first = True
102
        for i in self.records.index:
103
            if first == True:
104
                section = doc.sections[0]
105
            else:
106
                section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
107
            section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
108
            section.page_height = docx.shared.Mm(297)
109
            section.page_width = docx.shared.Mm(210)
110
            section.top_margin = docx.shared.Mm(20)
111
            section.right_margin = docx.shared.Mm(20)
112
            section.left_margin = docx.shared.Mm(20)
113
            section.bottom_margin = docx.shared.Mm(15)
114
            if self.template_type == 'aini2016':
115
                self._write_doc_aini2016(doc, self.records.loc[i])
116
            else:
117
                self._write_doc_jscpb2016(doc, self.records.loc[i])
118
            first = False
119
120
        doc.save(filename)
121
122
    def _write_doc_jscpb2016(self, doc, record):
123
        print('"%s"' % record['title'])
124
125
        # Title
126
        p = doc.add_paragraph(record.title)
127
        p.runs[0].font.size = docx.shared.Pt(12)
128
        p.runs[0].bold = True
129
130
        # Authors
131
        p = doc.add_paragraph()
132
        author_list = self.exreg4super.split(record.authors)
133
        for j in range(len(author_list)):
134
            if j & 1:
135
                p.add_run(author_list[j]).font.superscript = True
136
            else:
137
                p.add_run(author_list[j])
138
139
        # Affiliations
140
        p = doc.add_paragraph(record.affiliations)
141
        p.runs[0].font.size = docx.shared.Pt(9)
142
        p.runs[0].italic = True
143
144
        # Abstract Body
145
        p = doc.add_paragraph(record.abstract)
146
147
        # keywords
148
        p = doc.add_paragraph('Keywords: ')
149
        p.add_run(record.keywords).italic = True
150
151
152
    def _write_doc_aini2016(self, doc, record):
153
        print('"%s"' % record['Title'])
154
        exreg4num = re.compile(r'\((\w+)\)')
155
156
        font = doc.styles['Normal'].font
157
        font.size = docx.shared.Pt(10)
158
        font.name = 'Times New Roman'
159
160
        # Program Number
161
        #p = doc.add_paragraph()
162
        #p.paragraph_format.line_spacing = docx.shared.Pt(12)
163
        #p.paragraph_format.space_after = docx.shared.Pt(5)
164
        #r = p.add_run(record['Program No.'].strip())
165
166
        # Title
167
        p = doc.add_paragraph()
168
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
169
        p.paragraph_format.space_before = docx.shared.Pt(25)
170
        p.paragraph_format.space_after = docx.shared.Pt(14)
171
        r = p.add_run(record['Title'].strip())
172
        r.font.size = docx.shared.Pt(12)
173
        r.bold = True
174
        r.italic = True
175
176
        # Authors
177
        p = doc.add_paragraph()
178
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
179
        p.paragraph_format.line_spacing = docx.shared.Pt(12)
180
        p.paragraph_format.space_after = docx.shared.Pt(12)
181
        authors = self._toArray(record['Name'], '\n')
182
        first = True
183
        for author in authors:
184
            m = self.exreg4author.match(author)
185
            if first == False:
186
                p.add_run(', ').bold = True
187
            name = m.group(1).strip().replace(' ', '\u00A0')
188
            num = self._removeParentheses(m.group(2).strip())
189
            p.add_run(name).bold = True
190
            if num != '':
191
                r = p.add_run('\u00A0' + num)
192
                r.bold = True
193
                r.font.superscript = True
194
            first = False
195
        p.add_run('\n')
196
197
        # Affiliation
198
        affiliations = self._toArray(record['Affiliation'], '\n')
199
        first = True
200
        for affiliation in affiliations:
201
            m = self.exreg4affiliation.match(affiliation)
202
            if first == False:
203
                p.add_run(', ')
204
            num = self._removeParentheses(m.group(1).strip())
205
            name = m.group(2).strip()
206
            if num != '':
207
                r = p.add_run(num + '\u00A0')
208
                r.font.superscript = True
209
            p.add_run(name)
210
            first = False
211
        p.add_run('\n' + record['e-mail'])
212
213
        # DOI
214
        p = doc.add_paragraph('DOI:' + record['DOI'].strip())
215
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
216
        p.paragraph_format.space_after = docx.shared.Pt(12)
217
218
        # Abstract Body
219
        items = self._toArray(record['Abstract'], '\n')
220
        first = True
221
        for item in items:
222
            p = doc.add_paragraph(item)
223
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
224
            p.paragraph_format.line_spacing = docx.shared.Pt(11)
225
            p.paragraph_format.space_after = docx.shared.Pt(2)
226
            if first == False:
227
                p.paragraph_format.first_line_indent = docx.shared.Pt(12)
228
            first = False
229
        p.paragraph_format.space_after = docx.shared.Pt(12)
230
231
        # Figure
232
        if self._empty(record['Figure file Name']) == False:
233
234
            # Figure File Name
235
            img_fpath = os.path.join(self.image_dir, record['Figure file Name'])
236
            size = self._getPreferredImageSize(img_fpath)
237
            doc.add_picture(img_fpath, width=size[0]) #, height=size[1])
238
            p = doc.paragraphs[-1]
239
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
240
241
            # Figure Comment
242
            items = self._toArray(record['Figure comment'], '\n')
243
            first = True
244
            for item in items:
245
                p = doc.add_paragraph()
246
                p.paragraph_format.line_spacing = docx.shared.Pt(10)
247
                p.paragraph_format.space_after = docx.shared.Pt(0)
248
                p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
249
                if first:
250
                    p.add_run('Figure: ').bold = True
251
                    first = False
252
                p.add_run(item)
253
254
        p.paragraph_format.space_after = docx.shared.Pt(14)
255
256
        # References
257
        items = self._toArray(record['References'], '\n')
258
        first = True
259
        for item in items:
260
            if first:
261
                p = doc.add_paragraph()
262
                p.paragraph_format.line_spacing = docx.shared.Pt(11)
263
                p.paragraph_format.space_after = docx.shared.Pt(0)
264
                p.add_run('References:').bold = True
265
                first = False
266
            p = doc.add_paragraph()
267
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
268
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
269
            p.paragraph_format.space_after = docx.shared.Pt(0)
270
            p.add_run(item)
271
        p.paragraph_format.space_after = docx.shared.Pt(10)
272
273
        # Acknowledgement
274
        items = self._toArray(record['Acknowledgement'], '\n')
275
        first = True
276
        for item in items:
277
            p = doc.add_paragraph()
278
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
279
            p.paragraph_format.space_after = docx.shared.Pt(0)
280
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
281
            if first:
282
                p.add_run('Ackknowledgement: ').bold = True
283
                first = False
284
            p.add_run(item)
285
        p.paragraph_format.space_after = docx.shared.Pt(10)
286
287
        # Funding
288
        items = self._toArray(record['Funding'], '\n')
289
        first = True
290
        for item in items:
291
            p = doc.add_paragraph()
292
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
293
            p.paragraph_format.space_after = docx.shared.Pt(0)
294
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
295
            if first:
296
                p.add_run('Funding: ').bold = True
297
                first = False
298
            p.add_run(item)
299
        p.paragraph_format.space_after = docx.shared.Pt(10)
300
301
        # Citation
302
        p = doc.add_paragraph()
303
        p.paragraph_format.line_spacing = docx.shared.Pt(10)
304
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
305
        p.add_run('Citation: ').bold = True
306
        author_tmp = ''
307
        first = True
308
        for author in authors:
309
            m = self.exreg4author.match(author)
310
            if first == False:
311
                author_tmp += ', '
312
            author_tmp += m.group(1).strip()
313
            first = False
314
        p.add_run(author_tmp + ' (2016). ' + record['Title'].strip().replace('\n', ' ') + '. ')
315
        p.add_run('Advances in Neuroinformatics IV. ').italic = True
316
        p.add_run('AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record['DOI'].strip())
317
318
319
if __name__ == '__main__':
320
    img_dir = './image'
321
    input_xlsx = 'input.xlsx'
322
    output_docx = 'output.docx'
323
    template_docx = './template/aini2016.docx'
324