AbstractGenerator._empty() - Code Metrics - Inspection of "add image auto resizing function" - DaisukeMiyamoto/abstract_generator - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#2)

by Yoshihiro

created 2016-12-06 09:29 UTC

AbstractGenerator._empty() A

↳ Parent: AbstractGenerator

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	4
rs	10
c	0
b	0
f	0

# -*- coding: utf-8 -*-
"""
Generate abstract document (docx) file from table (xlsx)
by nebula

Dependency: pandas, xlrd, python-docx, pillow
"""
from PIL import Image
import pandas as pd
import docx
import math
import re
import os


class AbstractGenerator:
    def __init__(self, image_dir='', template_type='aini2016'):
        self.records = None
        self.image_dir = image_dir
        self.template_type = template_type
        self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
        self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
        self.exreg4super = re.compile(r'(\(\w+\))')
        self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')
        self.preferredImageMaxWidth = 14  # cm
        self.preferredImageMaxHeight = 8.5 # cm
        self.preferredImageDpi = 72

    def _insert_image(self, filename, image_filename):
        doc = docx.Document(filename)

        for paragraph in doc.paragraphs:
            if '[[FIGURE]]' in paragraph.text:
                #paragraph.text = ''
                run = paragraph.add_run()
                run.add_paragraph()
                inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
                run.add_paragraph()

        doc.save(filename)

    def _empty(self, text):
        if isinstance(text, float) and math.isnan(text):
            return True
        return text.strip() == ''

    def _toArray(self, text, delim):
        if self._empty(text) == True:
            return []
        items = text.split(delim)
        return [item for item in items if item.strip()]

    def _removeParentheses(self, text):
        exreg = re.compile(r'\((\w+)\)')
        nums = exreg.split(text)
        num = ''
        for n in nums:
             n = n.strip()
             if n == '':
                 continue
             if num != '':
                 num += ', '
             num += n
        return num

    def _getImageSize(self, pixel, dpi):
        return pixel / dpi * 2.54

    def _getPreferredImageSize(self, fpath):
        img = Image.open(fpath)
        dpi = (self.preferredImageDpi, self.preferredImageDpi)
        if 'dpi' in img.info:
            dpi = img.info['dpi']
        if 'jfif_density' in img.info:
            dpi = img.info['jfif_density']
        width = self._getImageSize(img.size[0], dpi[0])
        height = self._getImageSize(img.size[1], dpi[1])
        if width > self.preferredImageMaxWidth:
            height = height * self.preferredImageMaxWidth / width
            width = self.preferredImageMaxWidth
        if height > self.preferredImageMaxHeight:
            width = width * self.preferredImageMaxHeight / height
            height = self.preferredImageMaxHeight
        # print('image: %s(w:%dpx(%gcm),h:%dpx(%gcm),dpi:%s) -> (w:%gcm,h:%gcm)' % (fpath, img.size[0], self._getImageSize(img.size[0], dpi[0]), img.size[1], self._getImageSize(img.size[1], dpi[1]), dpi, width, height))
        img.close()
        return (docx.shared.Cm(width), docx.shared.Cm(height))

    def read_xlsx(self, filename):
        print('Reading: %s' % filename)
        exls = pd.ExcelFile(filename)
        self.records = exls.parse()

    def write_docx(self, filename, template=None):
        print('Writing: %s' % filename)

        if template is not None:
            doc = docx.Document(template)
        else:
            doc = docx.Document()

        first = True
        for i in self.records.index:
            if first == True:
                section = doc.sections[0]
            else:
                section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
            section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
            section.page_height = docx.shared.Mm(297)
            section.page_width = docx.shared.Mm(210)
            section.top_margin = docx.shared.Mm(20)
            section.right_margin = docx.shared.Mm(20)
            section.left_margin = docx.shared.Mm(20)
            section.bottom_margin = docx.shared.Mm(15)
            if self.template_type == 'aini2016':
                self._write_doc_aini2016(doc, self.records.loc[i])
            else:
                self._write_doc_jscpb2016(doc, self.records.loc[i])
            first = False

        doc.save(filename)

    def _write_doc_jscpb2016(self, doc, record):
        print('"%s"' % record['title'])

        # Title
        p = doc.add_paragraph(record.title)
        p.runs[0].font.size = docx.shared.Pt(12)
        p.runs[0].bold = True

        # Authors
        p = doc.add_paragraph()
        author_list = self.exreg4super.split(record.authors)
        for j in range(len(author_list)):
            if j & 1:
                p.add_run(author_list[j]).font.superscript = True
            else:
                p.add_run(author_list[j])

        # Affiliations
        p = doc.add_paragraph(record.affiliations)
        p.runs[0].font.size = docx.shared.Pt(9)
        p.runs[0].italic = True

        # Abstract Body
        p = doc.add_paragraph(record.abstract)

        # keywords
        p = doc.add_paragraph('Keywords: ')
        p.add_run(record.keywords).italic = True


    def _write_doc_aini2016(self, doc, record):
        print('"%s"' % record['Title'])
        exreg4num = re.compile(r'\((\w+)\)')

        font = doc.styles['Normal'].font
        font.size = docx.shared.Pt(10)
        font.name = 'Times New Roman'

        # Program Number
        #p = doc.add_paragraph()
        #p.paragraph_format.line_spacing = docx.shared.Pt(12)
        #p.paragraph_format.space_after = docx.shared.Pt(5)
        #r = p.add_run(record['Program No.'].strip())

        # Title
        p = doc.add_paragraph()
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
        p.paragraph_format.space_before = docx.shared.Pt(25)
        p.paragraph_format.space_after = docx.shared.Pt(14)
        r = p.add_run(record['Title'].strip())
        r.font.size = docx.shared.Pt(12)
        r.bold = True
        r.italic = True

        # Authors
        p = doc.add_paragraph()
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
        p.paragraph_format.line_spacing = docx.shared.Pt(12)
        p.paragraph_format.space_after = docx.shared.Pt(12)
        authors = self._toArray(record['Name'], '\n')
        first = True
        for author in authors:
            m = self.exreg4author.match(author)
            if first == False:
                p.add_run(', ').bold = True
            name = m.group(1).strip().replace(' ', '\u00A0')
            num = self._removeParentheses(m.group(2).strip())
            p.add_run(name).bold = True
            if num != '':
                r = p.add_run('\u00A0' + num)
                r.bold = True
                r.font.superscript = True
            first = False
        p.add_run('\n')

        # Affiliation
        affiliations = self._toArray(record['Affiliation'], '\n')
        first = True
        for affiliation in affiliations:
            m = self.exreg4affiliation.match(affiliation)
            if first == False:
                p.add_run(', ')
            num = self._removeParentheses(m.group(1).strip())
            name = m.group(2).strip()
            if num != '':
                r = p.add_run(num + '\u00A0')
                r.font.superscript = True
            p.add_run(name)
            first = False
        p.add_run('\n' + record['e-mail'])

        # DOI
        p = doc.add_paragraph('DOI:' + record['DOI'].strip())
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
        p.paragraph_format.space_after = docx.shared.Pt(12)

        # Abstract Body
        items = self._toArray(record['Abstract'], '\n')
        first = True
        for item in items:
            p = doc.add_paragraph(item)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            p.paragraph_format.line_spacing = docx.shared.Pt(11)
            p.paragraph_format.space_after = docx.shared.Pt(2)
            if first == False:
                p.paragraph_format.first_line_indent = docx.shared.Pt(12)
            first = False
        p.paragraph_format.space_after = docx.shared.Pt(12)

        # Figure
        if self._empty(record['Figure file Name']) == False:

            # Figure File Name
            img_fpath = os.path.join(self.image_dir, record['Figure file Name'])
            size = self._getPreferredImageSize(img_fpath)
            doc.add_picture(img_fpath, width=size[0]) #, height=size[1])
            p = doc.paragraphs[-1]
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER

            # Figure Comment
            items = self._toArray(record['Figure comment'], '\n')
            first = True
            for item in items:
                p = doc.add_paragraph()
                p.paragraph_format.line_spacing = docx.shared.Pt(10)
                p.paragraph_format.space_after = docx.shared.Pt(0)
                p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
                if first:
                    p.add_run('Figure: ').bold = True
                    first = False
                p.add_run(item)

        p.paragraph_format.space_after = docx.shared.Pt(14)

        # References
        items = self._toArray(record['References'], '\n')
        first = True
        for item in items:
            if first:
                p = doc.add_paragraph()
                p.paragraph_format.line_spacing = docx.shared.Pt(11)
                p.paragraph_format.space_after = docx.shared.Pt(0)
                p.add_run('References:').bold = True
                first = False
            p = doc.add_paragraph()
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(10)

        # Acknowledgement
        items = self._toArray(record['Acknowledgement'], '\n')
        first = True
        for item in items:
            p = doc.add_paragraph()
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            if first:
                p.add_run('Ackknowledgement: ').bold = True
                first = False
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(10)

        # Funding
        items = self._toArray(record['Funding'], '\n')
        first = True
        for item in items:
            p = doc.add_paragraph()
            p.paragraph_format.line_spacing = docx.shared.Pt(10)
            p.paragraph_format.space_after = docx.shared.Pt(0)
            p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
            if first:
                p.add_run('Funding: ').bold = True
                first = False
            p.add_run(item)
        p.paragraph_format.space_after = docx.shared.Pt(10)

        # Citation
        p = doc.add_paragraph()
        p.paragraph_format.line_spacing = docx.shared.Pt(10)
        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
        p.add_run('Citation: ').bold = True
        author_tmp = ''
        first = True
        for author in authors:
            m = self.exreg4author.match(author)
            if first == False:
                author_tmp += ', '
            author_tmp += m.group(1).strip()
            first = False
        p.add_run(author_tmp + ' (2016). ' + record['Title'].strip().replace('\n', ' ') + '. ')
        p.add_run('Advances in Neuroinformatics IV. ').italic = True
        p.add_run('AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record['DOI'].strip())


if __name__ == '__main__':
    img_dir = './image'
    input_xlsx = 'input.xlsx'
    output_docx = 'output.docx'
    template_docx = './template/aini2016.docx'


1			# -- coding: utf-8 --
2			"""
3			Generate abstract document (docx) file from table (xlsx)
4			by nebula
5
6			Dependency: pandas, xlrd, python-docx, pillow
7			"""
8			from PIL import Image
9			import pandas as pd
10			import docx
11			import math
12			import re
13			import os
14
15
16			class AbstractGenerator:
17			def __init__(self, image_dir='', template_type='aini2016'):
18			self.records = None
19			self.image_dir = image_dir
20			self.template_type = template_type
21			self.exreg4author = re.compile(r'^([^\)]+)((?:\(.+\))*)$')
22			self.exreg4affiliation = re.compile(r'^((?:\(.+\))*)(.+)$')
23			self.exreg4super = re.compile(r'(\(\w+\))')
24			self.exreg4italic = re.compile(r'(\<i\>\w+\</i\>)')
25			self.preferredImageMaxWidth = 14 # cm
26			self.preferredImageMaxHeight = 8.5 # cm
27			self.preferredImageDpi = 72
28
29			def _insert_image(self, filename, image_filename):
30			doc = docx.Document(filename)
31
32			for paragraph in doc.paragraphs:
33			if '[[FIGURE]]' in paragraph.text:
34			#paragraph.text = ''
35			run = paragraph.add_run()
36			run.add_paragraph()
37			inline_shape = run.add_picture(image_filename, width=docx.shared.Pt(300))
38			run.add_paragraph()
39
40			doc.save(filename)
41
42			def _empty(self, text):
43			if isinstance(text, float) and math.isnan(text):
44			return True
45			return text.strip() == ''
46
47			def _toArray(self, text, delim):
48			if self._empty(text) == True:
49			return []
50			items = text.split(delim)
51			return [item for item in items if item.strip()]
52
53			def _removeParentheses(self, text):
54			exreg = re.compile(r'\((\w+)\)')
55			nums = exreg.split(text)
56			num = ''
57			for n in nums:
58			n = n.strip()
59			if n == '':
60			continue
61			if num != '':
62			num += ', '
63			num += n
64			return num
65
66			def _getImageSize(self, pixel, dpi):
67			return pixel / dpi * 2.54
68
69			def _getPreferredImageSize(self, fpath):
70			img = Image.open(fpath)
71			dpi = (self.preferredImageDpi, self.preferredImageDpi)
72			if 'dpi' in img.info:
73			dpi = img.info['dpi']
74			if 'jfif_density' in img.info:
75			dpi = img.info['jfif_density']
76			width = self._getImageSize(img.size[0], dpi[0])
77			height = self._getImageSize(img.size[1], dpi[1])
78			if width > self.preferredImageMaxWidth:
79			height = height * self.preferredImageMaxWidth / width
80			width = self.preferredImageMaxWidth
81			if height > self.preferredImageMaxHeight:
82			width = width * self.preferredImageMaxHeight / height
83			height = self.preferredImageMaxHeight
84			# print('image: %s(w:%dpx(%gcm),h:%dpx(%gcm),dpi:%s) -> (w:%gcm,h:%gcm)' % (fpath, img.size[0], self._getImageSize(img.size[0], dpi[0]), img.size[1], self._getImageSize(img.size[1], dpi[1]), dpi, width, height))
85			img.close()
86			return (docx.shared.Cm(width), docx.shared.Cm(height))
87
88			def read_xlsx(self, filename):
89			print('Reading: %s' % filename)
90			exls = pd.ExcelFile(filename)
91			self.records = exls.parse()
92
93			def write_docx(self, filename, template=None):
94			print('Writing: %s' % filename)
95
96			if template is not None:
97			doc = docx.Document(template)
98			else:
99			doc = docx.Document()
100
101			first = True
102			for i in self.records.index:
103			if first == True:
104			section = doc.sections[0]
105			else:
106			section = doc.add_section(docx.enum.section.WD_SECTION.NEW_PAGE)
107			section.orientation = docx.enum.section.WD_ORIENT.PORTRAIT
108			section.page_height = docx.shared.Mm(297)
109			section.page_width = docx.shared.Mm(210)
110			section.top_margin = docx.shared.Mm(20)
111			section.right_margin = docx.shared.Mm(20)
112			section.left_margin = docx.shared.Mm(20)
113			section.bottom_margin = docx.shared.Mm(15)
114			if self.template_type == 'aini2016':
115			self._write_doc_aini2016(doc, self.records.loc[i])
116			else:
117			self._write_doc_jscpb2016(doc, self.records.loc[i])
118			first = False
119
120			doc.save(filename)
121
122			def _write_doc_jscpb2016(self, doc, record):
123			print('"%s"' % record['title'])
124
125			# Title
126			p = doc.add_paragraph(record.title)
127			p.runs[0].font.size = docx.shared.Pt(12)
128			p.runs[0].bold = True
129
130			# Authors
131			p = doc.add_paragraph()
132			author_list = self.exreg4super.split(record.authors)
133			for j in range(len(author_list)):
134			if j & 1:
135			p.add_run(author_list[j]).font.superscript = True
136			else:
137			p.add_run(author_list[j])
138
139			# Affiliations
140			p = doc.add_paragraph(record.affiliations)
141			p.runs[0].font.size = docx.shared.Pt(9)
142			p.runs[0].italic = True
143
144			# Abstract Body
145			p = doc.add_paragraph(record.abstract)
146
147			# keywords
148			p = doc.add_paragraph('Keywords: ')
149			p.add_run(record.keywords).italic = True
150
151
152			def _write_doc_aini2016(self, doc, record):
153			print('"%s"' % record['Title'])
154			exreg4num = re.compile(r'\((\w+)\)')
155
156			font = doc.styles['Normal'].font
157			font.size = docx.shared.Pt(10)
158			font.name = 'Times New Roman'
159
160			# Program Number
161			#p = doc.add_paragraph()
162			#p.paragraph_format.line_spacing = docx.shared.Pt(12)
163			#p.paragraph_format.space_after = docx.shared.Pt(5)
164			#r = p.add_run(record['Program No.'].strip())
165
166			# Title
167			p = doc.add_paragraph()
168			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
169			p.paragraph_format.space_before = docx.shared.Pt(25)
170			p.paragraph_format.space_after = docx.shared.Pt(14)
171			r = p.add_run(record['Title'].strip())
172			r.font.size = docx.shared.Pt(12)
173			r.bold = True
174			r.italic = True
175
176			# Authors
177			p = doc.add_paragraph()
178			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
179			p.paragraph_format.line_spacing = docx.shared.Pt(12)
180			p.paragraph_format.space_after = docx.shared.Pt(12)
181			authors = self._toArray(record['Name'], '\n')
182			first = True
183			for author in authors:
184			m = self.exreg4author.match(author)
185			if first == False:
186			p.add_run(', ').bold = True
187			name = m.group(1).strip().replace(' ', '\u00A0')
188			num = self._removeParentheses(m.group(2).strip())
189			p.add_run(name).bold = True
190			if num != '':
191			r = p.add_run('\u00A0' + num)
192			r.bold = True
193			r.font.superscript = True
194			first = False
195			p.add_run('\n')
196
197			# Affiliation
198			affiliations = self._toArray(record['Affiliation'], '\n')
199			first = True
200			for affiliation in affiliations:
201			m = self.exreg4affiliation.match(affiliation)
202			if first == False:
203			p.add_run(', ')
204			num = self._removeParentheses(m.group(1).strip())
205			name = m.group(2).strip()
206			if num != '':
207			r = p.add_run(num + '\u00A0')
208			r.font.superscript = True
209			p.add_run(name)
210			first = False
211			p.add_run('\n' + record['e-mail'])
212
213			# DOI
214			p = doc.add_paragraph('DOI:' + record['DOI'].strip())
215			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
216			p.paragraph_format.space_after = docx.shared.Pt(12)
217
218			# Abstract Body
219			items = self._toArray(record['Abstract'], '\n')
220			first = True
221			for item in items:
222			p = doc.add_paragraph(item)
223			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
224			p.paragraph_format.line_spacing = docx.shared.Pt(11)
225			p.paragraph_format.space_after = docx.shared.Pt(2)
226			if first == False:
227			p.paragraph_format.first_line_indent = docx.shared.Pt(12)
228			first = False
229			p.paragraph_format.space_after = docx.shared.Pt(12)
230
231			# Figure
232			if self._empty(record['Figure file Name']) == False:
233
234			# Figure File Name
235			img_fpath = os.path.join(self.image_dir, record['Figure file Name'])
236			size = self._getPreferredImageSize(img_fpath)
237			doc.add_picture(img_fpath, width=size[0]) #, height=size[1])
238			p = doc.paragraphs[-1]
239			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
240
241			# Figure Comment
242			items = self._toArray(record['Figure comment'], '\n')
243			first = True
244			for item in items:
245			p = doc.add_paragraph()
246			p.paragraph_format.line_spacing = docx.shared.Pt(10)
247			p.paragraph_format.space_after = docx.shared.Pt(0)
248			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
249			if first:
250			p.add_run('Figure: ').bold = True
251			first = False
252			p.add_run(item)
253
254			p.paragraph_format.space_after = docx.shared.Pt(14)
255
256			# References
257			items = self._toArray(record['References'], '\n')
258			first = True
259			for item in items:
260			if first:
261			p = doc.add_paragraph()
262			p.paragraph_format.line_spacing = docx.shared.Pt(11)
263			p.paragraph_format.space_after = docx.shared.Pt(0)
264			p.add_run('References:').bold = True
265			first = False
266			p = doc.add_paragraph()
267			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
268			p.paragraph_format.line_spacing = docx.shared.Pt(10)
269			p.paragraph_format.space_after = docx.shared.Pt(0)
270			p.add_run(item)
271			p.paragraph_format.space_after = docx.shared.Pt(10)
272
273			# Acknowledgement
274			items = self._toArray(record['Acknowledgement'], '\n')
275			first = True
276			for item in items:
277			p = doc.add_paragraph()
278			p.paragraph_format.line_spacing = docx.shared.Pt(10)
279			p.paragraph_format.space_after = docx.shared.Pt(0)
280			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
281			if first:
282			p.add_run('Ackknowledgement: ').bold = True
283			first = False
284			p.add_run(item)
285			p.paragraph_format.space_after = docx.shared.Pt(10)
286
287			# Funding
288			items = self._toArray(record['Funding'], '\n')
289			first = True
290			for item in items:
291			p = doc.add_paragraph()
292			p.paragraph_format.line_spacing = docx.shared.Pt(10)
293			p.paragraph_format.space_after = docx.shared.Pt(0)
294			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
295			if first:
296			p.add_run('Funding: ').bold = True
297			first = False
298			p.add_run(item)
299			p.paragraph_format.space_after = docx.shared.Pt(10)
300
301			# Citation
302			p = doc.add_paragraph()
303			p.paragraph_format.line_spacing = docx.shared.Pt(10)
304			p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.JUSTIFY
305			p.add_run('Citation: ').bold = True
306			author_tmp = ''
307			first = True
308			for author in authors:
309			m = self.exreg4author.match(author)
310			if first == False:
311			author_tmp += ', '
312			author_tmp += m.group(1).strip()
313			first = False
314			p.add_run(author_tmp + ' (2016). ' + record['Title'].strip().replace('\n', ' ') + '. ')
315			p.add_run('Advances in Neuroinformatics IV. ').italic = True
316			p.add_run('AINI 2016 and INCF Nodes Workshop Abstract: ' + record['Program No. Long'].strip() + '. DOI:' + record['DOI'].strip())
317
318
319			if __name__ == '__main__':
320			img_dir = './image'
321			input_xlsx = 'input.xlsx'
322			output_docx = 'output.docx'
323			template_docx = './template/aini2016.docx'
324

DaisukeMiyamoto / abstract_generator

Pull Request — master (#2)

AbstractGenerator._empty() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like