1
|
|
|
# -*- coding: utf-8 -*- |
2
|
1 |
|
"""Convert file format. |
3
|
|
|
""" |
4
|
1 |
|
import os |
5
|
1 |
|
import re |
6
|
|
|
|
7
|
1 |
|
import PIL.Image |
8
|
1 |
|
import PyPDF2 |
9
|
|
|
|
10
|
1 |
|
from .core import Common |
11
|
1 |
|
from .exceptions import InvalidImageFileFormatError |
12
|
1 |
|
from .utils.logging import get_logger |
13
|
|
|
|
14
|
1 |
|
logger = get_logger("image2pdf") |
15
|
|
|
|
16
|
|
|
|
17
|
1 |
|
class Image2PDF(Common): |
18
|
|
|
"""Make pdf file for using e-books. |
19
|
|
|
|
20
|
|
|
""" |
21
|
|
|
|
22
|
1 |
|
def __init__(self, digits, extension, directory_path=None): |
23
|
|
|
"""Constructor |
24
|
|
|
|
25
|
|
|
Args: |
26
|
|
|
digits (str): Regex target digit. |
27
|
|
|
extension (str): Target file extension. |
28
|
|
|
directory_path (str): Target directory path. |
29
|
|
|
""" |
30
|
1 |
|
super().__init__() |
31
|
1 |
|
self.__digits = digits |
32
|
1 |
|
self.__extension = self._convert_extension_with_dot(extension) |
33
|
1 |
|
self.__regex_ext = re.compile(self.__extension) |
34
|
1 |
|
self.__file_writer = PyPDF2.PdfFileWriter() |
35
|
1 |
|
if directory_path is not None: |
36
|
1 |
|
self.__directory_path = directory_path |
37
|
|
|
else: |
38
|
1 |
|
self.__directory_path = os.getcwd() |
39
|
1 |
|
logger.debug("Current Directory: {cwd}".format(cwd=self.__directory_path)) |
40
|
1 |
|
os.chdir(self.__directory_path) |
41
|
|
|
|
42
|
1 |
|
def make_pdf(self, filename, remove_flag=False): |
43
|
|
|
"""Make pdf file take in some image files. |
44
|
|
|
|
45
|
|
|
Make pdf file which you use e-books by take in some image |
46
|
|
|
files such as jpeg, png and gif. |
47
|
|
|
|
48
|
|
|
Args: |
49
|
|
|
filename (str): pdf file name |
50
|
|
|
remove_flag (bool): If true, original image file is deleted |
51
|
|
|
|
52
|
|
|
Returns: |
53
|
|
|
bool: If success, return true. |
54
|
|
|
|
55
|
|
|
""" |
56
|
1 |
|
self._check_image_extension(self.__extension) |
57
|
|
|
|
58
|
1 |
|
files = self._make_file_list(self.__directory_path, sort=True) |
59
|
1 |
|
logger.debug("files: {files}".format(files=files)) |
60
|
1 |
|
page_count = 0 |
61
|
1 |
|
remove_files = [] |
62
|
|
|
|
63
|
1 |
|
for file in files: |
64
|
1 |
|
num = self._check_serial_number(file, self.__digits) |
65
|
1 |
|
if self._check_skip_file(file, self.__regex_ext, num): |
66
|
1 |
|
pass |
67
|
|
|
else: |
68
|
1 |
|
pdf_file = self._convert_image_to_pdf(file) |
69
|
|
|
|
70
|
1 |
|
if self._merge_pdf_file(pdf_file, filename): |
71
|
1 |
|
logger.info( |
72
|
|
|
"Success write pdf for {page} page.".format(page=page_count + 1) |
73
|
|
|
) |
74
|
1 |
|
page_count += 1 |
75
|
1 |
|
if remove_flag: |
76
|
1 |
|
remove_files.append(file) |
77
|
1 |
|
logger.info("-" * 55) |
78
|
1 |
|
if page_count == 0: |
79
|
1 |
|
logger.warn("Target file doesn't exist... Finish.") |
80
|
1 |
|
return False |
81
|
1 |
|
logger.info( |
82
|
|
|
"All image file are converted. Filename: {filename}".format( |
83
|
|
|
filename=filename |
84
|
|
|
) |
85
|
|
|
) |
86
|
1 |
|
if self._remove_file_bulk(remove_files): |
87
|
1 |
|
logger.info("Post possess is finished") |
88
|
1 |
|
return True |
89
|
|
|
|
90
|
1 |
|
def _convert_image_to_pdf(self, file, resolution=100.0): |
91
|
|
|
"""Convert Image file to pdf file format. |
92
|
|
|
|
93
|
|
|
Args: |
94
|
|
|
file (str): Image file |
95
|
|
|
resolution (float): Pdf file resolution, default 100. |
96
|
|
|
|
97
|
|
|
Returns: |
98
|
|
|
str: Convert pdf file name. |
99
|
|
|
""" |
100
|
1 |
|
image = PIL.Image.open(file).convert("RGB") |
101
|
1 |
|
pdf_file_name = file.replace(self.__extension, ".pdf") |
102
|
1 |
|
image.save(pdf_file_name, "PDF", resolution=resolution) |
103
|
1 |
|
return pdf_file_name |
104
|
|
|
|
105
|
1 |
|
@staticmethod |
106
|
|
|
def _check_image_extension(extension): |
107
|
|
|
""" Check image file extension or not. |
108
|
|
|
|
109
|
|
|
Args: |
110
|
|
|
extension (str): Image file extension |
111
|
|
|
Returns: |
112
|
|
|
bool: If extension is image file, return true. |
113
|
|
|
Raises: |
114
|
|
|
InvalidImageFileFormatError: If extension is not image file. |
115
|
|
|
""" |
116
|
1 |
|
if extension not in (".jpg", ".png", ".gif"): |
117
|
1 |
|
raise InvalidImageFileFormatError() |
118
|
1 |
|
return True |
119
|
|
|
|
120
|
1 |
|
def _merge_pdf_file(self, pdf_file, filename): |
121
|
|
|
"""Marge pdf files. |
122
|
|
|
|
123
|
|
|
Args: |
124
|
|
|
pdf_file (str): 1 page pdf file |
125
|
|
|
filename (str): Merge target pdf file name |
126
|
|
|
|
127
|
|
|
Returns: |
128
|
|
|
bool: If success, return true. |
129
|
|
|
|
130
|
|
|
""" |
131
|
1 |
|
logger.debug(type(pdf_file)) |
132
|
1 |
|
with open(pdf_file, "rb") as f: |
133
|
1 |
|
file_reader = PyPDF2.PdfFileReader(f) |
134
|
1 |
|
self.__file_writer.addPage(file_reader.getPage(0)) |
135
|
1 |
|
logger.debug("Merge {pdf_file}".format(pdf_file=pdf_file)) |
136
|
1 |
|
self._write_pdf(filename) |
137
|
1 |
|
self._remove_file(pdf_file, assume_yes=True) |
138
|
1 |
|
return True |
139
|
|
|
|
140
|
1 |
|
def _write_pdf(self, file_name): |
141
|
|
|
"""Write pdf file |
142
|
|
|
|
143
|
|
|
Args: |
144
|
|
|
file_name (str): pdf file name. |
145
|
|
|
|
146
|
|
|
Returns: |
147
|
|
|
If success, return true. |
148
|
|
|
|
149
|
|
|
""" |
150
|
1 |
|
with open(file_name, "wb") as f: |
151
|
1 |
|
self.__file_writer.write(f) |
152
|
|
|
return True |
153
|
|
|
|