AbstractGenerator.write_docx() - Code Metrics - Inspection of "Merge pull request #1 from orrisroot/master" - DaisukeMiyamoto/abstract_generator - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 8c3d23...8de117 )

by Daisuke

created 2016-11-21 14:42 UTC

AbstractGenerator.write_docx() B

↳ Parent: AbstractGenerator

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	5
c	0
b	0
f	0
dl	0
loc	28
rs	8.0894

# -*- coding: utf-8 -*-
"""
Generate abstract document (docx) file from table (xlsx)
by nebula

Dependency: pandas, xlrd, python-docx
"""
import pandas as pd
import docx
import math
import re
import os


class AbstractGenerator:
    def __init__(self, image_dir='', template_type='aini2016'):
        self.records = None
        self.image_dir = image_dir
        self.template_type = template_type
        self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
        self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
        self.exreg4super = re.compile(r'(\(\w+\))')
        self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')

    def _insert_image(self, filename, image_filename):
        doc = docx.Document(filename)

        for paragraph in doc.paragraphs:
            if '[[FIGURE]]' in paragraph.text:
                #paragraph.text = ''
                run = paragraph.add_run()
                run.add_paragraph()
                inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
                run.add_paragraph()

        doc.save(filename)

    def _toArray(self, text, delim):
        if isinstance(text, float) and math.isnan(text):
            return []
        items = text.split(delim)
        return [item for item in items if item.strip()]

    def _removeParentheses(self, text):
        exreg = re.compile(r'\((\w+)\)')
        nums = exreg.split(text)
        num = ''
        for n in nums:
             n = n.strip()
             if n == '':
                 continue
             if num != '':
                 num += ', '
             num += n
        return num

    def read_xlsx(self, filename):
        print('Reading: %s' % filename)
        exls = pd.ExcelFile(filename)
        self.records = exls.parse()

    def write_docx(self, filename, template=None):
        print('Writing: %s' % filename)

        if template is not None:
            doc = docx.Document(template)
        else:
            doc = docx.Document()

        first = True
        for i in self.records.index:
            if first == True:
                section = doc.sections[0]
            else:
                section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
            section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
            section.page_height = docx.shared.Mm(297)
            section.page_width = docx.shared.Mm(210)
            section.top_margin = docx.shared.Mm(20)
            section.right_margin = docx.shared.Mm(20)
            section.left_margin = docx.shared.Mm(20)
            section.bottom_margin = docx.shared.Mm(15)
            if self.template_type == 'aini2016':
                self._write_doc_aini2016(doc, self.records.loc[i])
            else:
                self._write_doc_jscpb2016(doc, self.records.loc[i])
            first = False

        doc.save(filename)

    def _write_doc_jscpb2016(self, doc, record):
        print('"%s"' % record['title'])

        # Title
        p = doc.add_paragraph(record.title)
        p.runs[0].font.size = docx.shared.Pt(12)
        p.runs[0].bold = True

        # Authors
        p = doc.add_paragraph()
        author_list = self.exreg4super.split(record.authors)
        for j in range(len(author_list)):
            if j & 1:
                p.add_run(author_list[j]).font.superscript = True
            else:
                p.add_run(author_list[j])

        # Affiliations
        p = doc.add_paragraph(record.affiliations)
        p.runs[0].font.size = docx.shared.Pt(9)
        p.runs[0].italic = True

        # Abstract Body
        p = doc.add_paragraph(record.abstract)

        # keywords
        p = doc.add_paragraph('Keywords: ')
        p.add_run(record.keywords).italic = True


    def _write_doc_aini2016(self, doc, record):
        print('"%s"' % record['Title'])
        exreg4num = re.compile(r'\((\w+)\)')

        font = doc.styles['Normal'].font
        font.size = docx.shared.Pt(10)
        font.name = 'Lucida Grande'

        # Program Number
        #p = doc.add_paragraph()
        #p.paragraph_format.line_spacing = docx.shared.Pt(12)
        #p.paragraph_format.space_after = docx.shared.Pt(5)
        #r = p.add_run(record['Program No.'].strip())

        # Title
        p = doc.add_paragraph()
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
        p.paragraph_format.space_before = docx.shared.Pt(25)
        p.paragraph_format.space_after = docx.shared.Pt(14)
        r = p.add_run(record['Title'].strip())
        r.font.size = docx.shared.Pt(12)
        r.bold = True
        r.italic = True

        # Authors
        p = doc.add_paragraph()
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
        p.paragraph_format.line_spacing = docx.shared.Pt(12)
        p.paragraph_format.space_after = docx.shared.Pt(12)
        authors = self._toArray(record['Name'], '\n')
        first = True
        for author in authors:
            m = self.exreg4author.match(author)
            if first == False:
                p.add_run(', ').bold = True
            name = m.group(1).strip().replace(' ', '\u00A0')
            num = self._removeParentheses(m.group(2).strip())
            p.add_run(name).bold = True
            if num != '':
                r = p.add_run(num)
                r.bold = True
                r.font.superscript = True
            first = False
        p.add_run('\n')

        # Affiliation
        affiliations = self._toArray(record['Affiliation'], '\n')
        first = True
        for affiliation in affiliations:
            m = self.exreg4affiliation.match(affiliation)
            if first == False:
                p.add_run(', ')
            num = self._removeParentheses(m.group(1).strip())
            name = m.group(2).strip()
            if num != '':
                r = p.add_run(num + ' ')
                r.font.superscript = True
            p.add_run(name)
            first = False
        p.add_run('\n' + record['e-mail'])

        # DOI
        p = doc.add_paragraph('DOI:' + record['DOI'].strip())
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
        p.paragraph_format.space_after = docx.shared.Pt(12)

        # Abstract Body
        items = self._toArray(record['Abstract'], '\n')
        for item in items:
            p = doc.add_paragraph(item)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            p.paragraph_format.line_spacing = docx.shared.Pt(10)

        p.paragraph_format.space_after = docx.shared.Pt(12)

        # Figure
        doc.add_picture(os.path.join(self.image_dir, record['Figure file Name']))
        p = doc.paragraphs[-1]
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER

        # Figure Comment
        items = self._toArray(record['Figure comment'], '\n')
        first = True
        for item in items:
            p = doc.add_paragraph()
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            if first:
                p.add_run('Figure: ').bold = True
                first = False
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(14)

        # References
        items = self._toArray(record['References'], '\n')
        first = True
        for item in items:
            if first:
                p = doc.add_paragraph()
                p.paragraph_format.line_spacing = docx.shared.Pt(11)
                p.paragraph_format.space_after = docx.shared.Pt(0)
                p.add_run('References:').bold = True
                first = False
            p = doc.add_paragraph()
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(10)

        # Acknowledgement
        items = self._toArray(record['Acknowledgement'], '\n')
        first = True
        for item in items:
            p = doc.add_paragraph()
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            if first:
                p.add_run('Ackknowledgement: ').bold = True
                first = False
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(10)

        # Funding
        items = self._toArray(record['Funding'], '\n')
        first = True
        for item in items:
            p = doc.add_paragraph()
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            if first:
                p.add_run('Funding: ').bold = True
                first = False
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(10)

        # Citation
        p = doc.add_paragraph()
        p.paragraph_format.line_spacing = docx.shared.Pt(10)
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
        p.add_run('Citation: ').bold = True
        author_tmp = ''
        first = True
        for author in authors:
            m = self.exreg4author.match(author)
            if first == False:
                author_tmp += ', '
            author_tmp += m.group(1).strip()
            first = False
        p.add_run(author_tmp + ' (2016). ' + record['Title'] + '. ')
        p.add_run('Advances in Neuroinformatics IV. ').italic = True
        p.add_run('AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record['DOI'].strip())


if __name__ == '__main__':
    img_dir = './image'
    input_xlsx = 'input.xlsx'
    output_docx = 'output.docx'
    template_docx = './template/aini2016.docx'


1			# -- coding: utf-8 --
2			"""
3			Generate abstract document (docx) file from table (xlsx)
4			by nebula
5
6			Dependency: pandas, xlrd, python-docx
7			"""
8			import pandas as pd
9			import docx
10			import math
11			import re
12			import os
13
14
15			class AbstractGenerator:
16			def __init__(self, image_dir='', template_type='aini2016'):
17			self.records = None
18			self.image_dir = image_dir
19			self.template_type = template_type
20			self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
21			self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
22			self.exreg4super = re.compile(r'(\(\w+\))')
23			self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')
24
25			def _insert_image(self, filename, image_filename):
26			doc = docx.Document(filename)
27
28			for paragraph in doc.paragraphs:
29			if '[[FIGURE]]' in paragraph.text:
30			#paragraph.text = ''
31			run = paragraph.add_run()
32			run.add_paragraph()
33			inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
34			run.add_paragraph()
35
36			doc.save(filename)
37
38			def _toArray(self, text, delim):
39			if isinstance(text, float) and math.isnan(text):
40			return []
41			items = text.split(delim)
42			return [item for item in items if item.strip()]
43
44			def _removeParentheses(self, text):
45			exreg = re.compile(r'\((\w+)\)')
46			nums = exreg.split(text)
47			num = ''
48			for n in nums:
49			n = n.strip()
50			if n == '':
51			continue
52			if num != '':
53			num += ', '
54			num += n
55			return num
56
57			def read_xlsx(self, filename):
58			print('Reading: %s' % filename)
59			exls = pd.ExcelFile(filename)
60			self.records = exls.parse()
61
62			def write_docx(self, filename, template=None):
63			print('Writing: %s' % filename)
64
65			if template is not None:
66			doc = docx.Document(template)
67			else:
68			doc = docx.Document()
69
70			first = True
71			for i in self.records.index:
72			if first == True:
73			section = doc.sections[0]
74			else:
75			section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
76			section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
77			section.page_height = docx.shared.Mm(297)
78			section.page_width = docx.shared.Mm(210)
79			section.top_margin = docx.shared.Mm(20)
80			section.right_margin = docx.shared.Mm(20)
81			section.left_margin = docx.shared.Mm(20)
82			section.bottom_margin = docx.shared.Mm(15)
83			if self.template_type == 'aini2016':
84			self._write_doc_aini2016(doc, self.records.loc[i])
85			else:
86			self._write_doc_jscpb2016(doc, self.records.loc[i])
87			first = False
88
89			doc.save(filename)
90
91			def _write_doc_jscpb2016(self, doc, record):
92			print('"%s"' % record['title'])
93
94			# Title
95			p = doc.add_paragraph(record.title)
96			p.runs[0].font.size = docx.shared.Pt(12)
97			p.runs[0].bold = True
98
99			# Authors
100			p = doc.add_paragraph()
101			author_list = self.exreg4super.split(record.authors)
102			for j in range(len(author_list)):
103			if j & 1:
104			p.add_run(author_list[j]).font.superscript = True
105			else:
106			p.add_run(author_list[j])
107
108			# Affiliations
109			p = doc.add_paragraph(record.affiliations)
110			p.runs[0].font.size = docx.shared.Pt(9)
111			p.runs[0].italic = True
112
113			# Abstract Body
114			p = doc.add_paragraph(record.abstract)
115
116			# keywords
117			p = doc.add_paragraph('Keywords: ')
118			p.add_run(record.keywords).italic = True
119
120
121			def _write_doc_aini2016(self, doc, record):
122			print('"%s"' % record['Title'])
123			exreg4num = re.compile(r'\((\w+)\)')
124
125			font = doc.styles['Normal'].font
126			font.size = docx.shared.Pt(10)
127			font.name = 'Lucida Grande'
128
129			# Program Number
130			#p = doc.add_paragraph()
131			#p.paragraph_format.line_spacing = docx.shared.Pt(12)
132			#p.paragraph_format.space_after = docx.shared.Pt(5)
133			#r = p.add_run(record['Program No.'].strip())
134
135			# Title
136			p = doc.add_paragraph()
137			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
138			p.paragraph_format.space_before = docx.shared.Pt(25)
139			p.paragraph_format.space_after = docx.shared.Pt(14)
140			r = p.add_run(record['Title'].strip())
141			r.font.size = docx.shared.Pt(12)
142			r.bold = True
143			r.italic = True
144
145			# Authors
146			p = doc.add_paragraph()
147			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
148			p.paragraph_format.line_spacing = docx.shared.Pt(12)
149			p.paragraph_format.space_after = docx.shared.Pt(12)
150			authors = self._toArray(record['Name'], '\n')
151			first = True
152			for author in authors:
153			m = self.exreg4author.match(author)
154			if first == False:
155			p.add_run(', ').bold = True
156			name = m.group(1).strip().replace(' ', '\u00A0')
157			num = self._removeParentheses(m.group(2).strip())
158			p.add_run(name).bold = True
159			if num != '':
160			r = p.add_run(num)
161			r.bold = True
162			r.font.superscript = True
163			first = False
164			p.add_run('\n')
165
166			# Affiliation
167			affiliations = self._toArray(record['Affiliation'], '\n')
168			first = True
169			for affiliation in affiliations:
170			m = self.exreg4affiliation.match(affiliation)
171			if first == False:
172			p.add_run(', ')
173			num = self._removeParentheses(m.group(1).strip())
174			name = m.group(2).strip()
175			if num != '':
176			r = p.add_run(num + ' ')
177			r.font.superscript = True
178			p.add_run(name)
179			first = False
180			p.add_run('\n' + record['e-mail'])
181
182			# DOI
183			p = doc.add_paragraph('DOI:' + record['DOI'].strip())
184			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
185			p.paragraph_format.space_after = docx.shared.Pt(12)
186
187			# Abstract Body
188			items = self._toArray(record['Abstract'], '\n')
189			for item in items:
190			p = doc.add_paragraph(item)
191			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
192			p.paragraph_format.line_spacing = docx.shared.Pt(10)
193
194			p.paragraph_format.space_after = docx.shared.Pt(12)
195
196			# Figure
197			doc.add_picture(os.path.join(self.image_dir, record['Figure file Name']))
198			p = doc.paragraphs[-1]
199			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
200
201			# Figure Comment
202			items = self._toArray(record['Figure comment'], '\n')
203			first = True
204			for item in items:
205			p = doc.add_paragraph()
206			p.paragraph_format.line_spacing = docx.shared.Pt(10)
207			p.paragraph_format.space_after = docx.shared.Pt(0)
208			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
209			if first:
210			p.add_run('Figure: ').bold = True
211			first = False
212			p.add_run(item)
213			p.paragraph_format.space_after = docx.shared.Pt(14)
214
215			# References
216			items = self._toArray(record['References'], '\n')
217			first = True
218			for item in items:
219			if first:
220			p = doc.add_paragraph()
221			p.paragraph_format.line_spacing = docx.shared.Pt(11)
222			p.paragraph_format.space_after = docx.shared.Pt(0)
223			p.add_run('References:').bold = True
224			first = False
225			p = doc.add_paragraph()
226			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
227			p.paragraph_format.line_spacing = docx.shared.Pt(10)
228			p.paragraph_format.space_after = docx.shared.Pt(0)
229			p.add_run(item)
230			p.paragraph_format.space_after = docx.shared.Pt(10)
231
232			# Acknowledgement
233			items = self._toArray(record['Acknowledgement'], '\n')
234			first = True
235			for item in items:
236			p = doc.add_paragraph()
237			p.paragraph_format.line_spacing = docx.shared.Pt(10)
238			p.paragraph_format.space_after = docx.shared.Pt(0)
239			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
240			if first:
241			p.add_run('Ackknowledgement: ').bold = True
242			first = False
243			p.add_run(item)
244			p.paragraph_format.space_after = docx.shared.Pt(10)
245
246			# Funding
247			items = self._toArray(record['Funding'], '\n')
248			first = True
249			for item in items:
250			p = doc.add_paragraph()
251			p.paragraph_format.line_spacing = docx.shared.Pt(10)
252			p.paragraph_format.space_after = docx.shared.Pt(0)
253			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
254			if first:
255			p.add_run('Funding: ').bold = True
256			first = False
257			p.add_run(item)
258			p.paragraph_format.space_after = docx.shared.Pt(10)
259
260			# Citation
261			p = doc.add_paragraph()
262			p.paragraph_format.line_spacing = docx.shared.Pt(10)
263			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
264			p.add_run('Citation: ').bold = True
265			author_tmp = ''
266			first = True
267			for author in authors:
268			m = self.exreg4author.match(author)
269			if first == False:
270			author_tmp += ', '
271			author_tmp += m.group(1).strip()
272			first = False
273			p.add_run(author_tmp + ' (2016). ' + record['Title'] + '. ')
274			p.add_run('Advances in Neuroinformatics IV. ').italic = True
275			p.add_run('AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record['DOI'].strip())
276
277
278			if __name__ == '__main__':
279			img_dir = './image'
280			input_xlsx = 'input.xlsx'
281			output_docx = 'output.docx'
282			template_docx = './template/aini2016.docx'
283

DaisukeMiyamoto / abstract_generator

Push — master ( 8c3d23...8de117 )

AbstractGenerator.write_docx() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like