|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
1 |
|
"""Convert file format. |
|
3
|
|
|
""" |
|
4
|
1 |
|
import os |
|
5
|
1 |
|
import re |
|
6
|
|
|
|
|
7
|
1 |
|
import PIL.Image |
|
8
|
1 |
|
import PyPDF2 |
|
9
|
|
|
|
|
10
|
1 |
|
from .core import Common |
|
11
|
1 |
|
from .exceptions import InvalidImageFileFormatError |
|
12
|
1 |
|
from .utils.logging import get_logger |
|
13
|
|
|
|
|
14
|
1 |
|
logger = get_logger("image2pdf") |
|
15
|
|
|
|
|
16
|
|
|
|
|
17
|
1 |
|
class Image2PDF(Common): |
|
18
|
|
|
"""Make pdf file for using e-books. |
|
19
|
|
|
|
|
20
|
|
|
""" |
|
21
|
|
|
|
|
22
|
1 |
|
def __init__(self, digits, extension, directory_path=None): |
|
23
|
|
|
"""Constructor |
|
24
|
|
|
|
|
25
|
|
|
Args: |
|
26
|
|
|
digits (str): Regex target digit. |
|
27
|
|
|
extension (str): Target file extension. |
|
28
|
|
|
directory_path (str): Target directory path. |
|
29
|
|
|
""" |
|
30
|
1 |
|
super().__init__() |
|
31
|
1 |
|
self.__digits = digits |
|
32
|
1 |
|
self.__extension = self._convert_extension_with_dot(extension) |
|
33
|
1 |
|
self.__regex_ext = re.compile(self.__extension) |
|
34
|
1 |
|
self.__file_writer = PyPDF2.PdfFileWriter() |
|
35
|
1 |
|
if directory_path is not None: |
|
36
|
1 |
|
self.__directory_path = directory_path |
|
37
|
|
|
else: |
|
38
|
1 |
|
self.__directory_path = os.getcwd() |
|
39
|
1 |
|
logger.debug("Current Directory: {cwd}".format(cwd=self.__directory_path)) |
|
40
|
1 |
|
os.chdir(self.__directory_path) |
|
41
|
|
|
|
|
42
|
1 |
|
def make_pdf(self, filename, remove_flag=False): |
|
43
|
|
|
"""Make pdf file take in some image files. |
|
44
|
|
|
|
|
45
|
|
|
Make pdf file which you use e-books by take in some image |
|
46
|
|
|
files such as jpeg, png and gif. |
|
47
|
|
|
|
|
48
|
|
|
Args: |
|
49
|
|
|
filename (str): pdf file name |
|
50
|
|
|
remove_flag (bool): If true, original image file is deleted |
|
51
|
|
|
|
|
52
|
|
|
Returns: |
|
53
|
|
|
bool: If success, return true. |
|
54
|
|
|
|
|
55
|
|
|
""" |
|
56
|
1 |
|
self._check_image_extension(self.__extension) |
|
57
|
|
|
|
|
58
|
1 |
|
files = self._make_file_list(self.__directory_path, sort=True) |
|
59
|
1 |
|
logger.debug("files: {files}".format(files=files)) |
|
60
|
1 |
|
page_count = 0 |
|
61
|
1 |
|
remove_files = [] |
|
62
|
|
|
|
|
63
|
1 |
|
for file in files: |
|
64
|
1 |
|
num = self._check_serial_number(file, self.__digits) |
|
65
|
1 |
|
if self._check_skip_file(file, self.__regex_ext, num): |
|
66
|
1 |
|
pass |
|
67
|
|
|
else: |
|
68
|
1 |
|
pdf_file = self._convert_image_to_pdf(file) |
|
69
|
|
|
|
|
70
|
1 |
|
if self._merge_pdf_file(pdf_file, filename): |
|
71
|
1 |
|
logger.info( |
|
72
|
|
|
"Success write pdf for {page} page.".format(page=page_count + 1) |
|
73
|
|
|
) |
|
74
|
1 |
|
page_count += 1 |
|
75
|
1 |
|
if remove_flag: |
|
76
|
1 |
|
remove_files.append(file) |
|
77
|
1 |
|
logger.info("-" * 55) |
|
78
|
1 |
|
if page_count == 0: |
|
79
|
1 |
|
logger.warn("Target file doesn't exist... Finish.") |
|
80
|
1 |
|
return False |
|
81
|
1 |
|
logger.info( |
|
82
|
|
|
"All image file are converted. Filename: {filename}".format( |
|
83
|
|
|
filename=filename |
|
84
|
|
|
) |
|
85
|
|
|
) |
|
86
|
1 |
|
if self._remove_file_bulk(remove_files): |
|
87
|
1 |
|
logger.info("Post possess is finished") |
|
88
|
1 |
|
return True |
|
89
|
|
|
|
|
90
|
1 |
|
def _convert_image_to_pdf(self, file, resolution=100.0): |
|
91
|
|
|
"""Convert Image file to pdf file format. |
|
92
|
|
|
|
|
93
|
|
|
Args: |
|
94
|
|
|
file (str): Image file |
|
95
|
|
|
resolution (float): Pdf file resolution, default 100. |
|
96
|
|
|
|
|
97
|
|
|
Returns: |
|
98
|
|
|
str: Convert pdf file name. |
|
99
|
|
|
""" |
|
100
|
1 |
|
image = PIL.Image.open(file).convert("RGB") |
|
101
|
1 |
|
pdf_file_name = file.replace(self.__extension, ".pdf") |
|
102
|
1 |
|
image.save(pdf_file_name, "PDF", resolution=resolution) |
|
103
|
1 |
|
return pdf_file_name |
|
104
|
|
|
|
|
105
|
1 |
|
@staticmethod |
|
106
|
|
|
def _check_image_extension(extension): |
|
107
|
|
|
""" Check image file extension or not. |
|
108
|
|
|
|
|
109
|
|
|
Args: |
|
110
|
|
|
extension (str): Image file extension |
|
111
|
|
|
Returns: |
|
112
|
|
|
bool: If extension is image file, return true. |
|
113
|
|
|
Raises: |
|
114
|
|
|
InvalidImageFileFormatError: If extension is not image file. |
|
115
|
|
|
""" |
|
116
|
1 |
|
if extension not in (".jpg", ".png", ".gif"): |
|
117
|
1 |
|
raise InvalidImageFileFormatError() |
|
118
|
1 |
|
return True |
|
119
|
|
|
|
|
120
|
1 |
|
def _merge_pdf_file(self, pdf_file, filename): |
|
121
|
|
|
"""Marge pdf files. |
|
122
|
|
|
|
|
123
|
|
|
Args: |
|
124
|
|
|
pdf_file (str): 1 page pdf file |
|
125
|
|
|
filename (str): Merge target pdf file name |
|
126
|
|
|
|
|
127
|
|
|
Returns: |
|
128
|
|
|
bool: If success, return true. |
|
129
|
|
|
|
|
130
|
|
|
""" |
|
131
|
1 |
|
logger.debug(type(pdf_file)) |
|
132
|
1 |
|
with open(pdf_file, "rb") as f: |
|
133
|
1 |
|
file_reader = PyPDF2.PdfFileReader(f) |
|
134
|
1 |
|
self.__file_writer.addPage(file_reader.getPage(0)) |
|
135
|
1 |
|
logger.debug("Merge {pdf_file}".format(pdf_file=pdf_file)) |
|
136
|
1 |
|
self._write_pdf(filename) |
|
137
|
1 |
|
self._remove_file(pdf_file, assume_yes=True) |
|
138
|
1 |
|
return True |
|
139
|
|
|
|
|
140
|
1 |
|
def _write_pdf(self, file_name): |
|
141
|
|
|
"""Write pdf file |
|
142
|
|
|
|
|
143
|
|
|
Args: |
|
144
|
|
|
file_name (str): pdf file name. |
|
145
|
|
|
|
|
146
|
|
|
Returns: |
|
147
|
|
|
If success, return true. |
|
148
|
|
|
|
|
149
|
|
|
""" |
|
150
|
1 |
|
with open(file_name, "wb") as f: |
|
151
|
1 |
|
self.__file_writer.write(f) |
|
152
|
|
|
return True |
|
153
|
|
|
|