PDFConverter   A
last analyzed

Complexity

Total Complexity 11

Size/Duplication

Total Lines 73
Duplicated Lines 0 %

Importance

Changes 2
Bugs 0 Features 1
Metric Value
c 2
b 0
f 1
dl 0
loc 73
rs 10
wmc 11

3 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 10 1
A finalize() 0 5 1
D execute() 0 52 9
1
""" Package to convert PDF to PNG
2
3
.. Authors:
4
    Philippe Dessauw
5
    [email protected]
6
7
.. Sponsor:
8
    Alden Dima
9
    [email protected]
10
    Information Systems Group
11
    Software and Systems Division
12
    Information Technology Laboratory
13
    National Institute of Standards and Technology
14
    http://www.nist.gov/itl/ssd/is
15
"""
16
from os import listdir
17
from os.path import join, isfile, dirname, splitext, basename, isdir
18
import PyPDF2
19
import PythonMagick
20
from pipeline.command import Command
21
22
23
class PDFConverter(Command):
24
    """ Command to convert PDF to PNG.
25
    """
26
27
    def __init__(self, filename, logger, config):
28
        super(PDFConverter, self).__init__(filename, logger, config)
29
30
        self.density = config["command"]["density"]
31
        self.depth = config["command"]["depth"]
32
        self.quality = config["command"]["quality"]
33
34
        self.logger.debug("PDF converter {density: "+str(self.density)
35
                          + "; depth: "+str(self.depth)
36
                          + "; quality: "+str(self.quality) + "}")
37
38
    def execute(self):
39
        """ Execute the command
40
        """
41
        self.logger.debug(":::    PDF conversion (%s)    :::" % str(self.unzipped))
42
43
        if not isdir(self.unzipped):
44
            self.logger.error("%s is not a directory" % self.unzipped)
45
            return 2
46
47
        self.logger.debug("Browsing %s for pdf files..." % self.unzipped)
48
        pdf_list = [join(self.unzipped, f) for f in listdir(self.unzipped)
49
                    if isfile(join(self.unzipped, f)) and f.endswith(".pdf")]
50
51
        if len(pdf_list) != 1:
52
            self.logger.error("Incorrect number of PDF file in " + self.unzipped
53
                              + " (" + str(len(pdf_list)) + " found, 1 expected)")
54
            self.finalize()
55
            return 1
56
57
        filename = str(pdf_list[0])
58
        with open(filename, "rb") as pdf:
59
            pdf_filereader = PyPDF2.PdfFileReader(pdf)
60
            pdf_page_nb = pdf_filereader.getNumPages()
61
62
        pdf_dirname = dirname(filename)
63
        imagesdir = "png"
64
65
        self.logger.debug(str(pdf_page_nb) + " page(s) detected")
66
        for p in xrange(pdf_page_nb):
67
68
            try:  # Reading the PDF
69
                img = PythonMagick.Image()
70
                img.density(str(self.density))
71
                img.depth(self.depth)
72
                img.quality(self.quality)
73
74
                pdf_page_file = filename + '[' + str(p) + ']'
75
                self.logger.debug("Reading " + pdf_page_file + "...")
76
                img.read(pdf_page_file)
77
78
                png_dirname = join(pdf_dirname, imagesdir)
79
                png_filename = splitext(basename(filename))[0] + '-' + str(p) + '.png'
80
                png_page_file = join(png_dirname, png_filename)
81
                self.logger.debug("Writing " + png_page_file + "...")
82
                img.write(png_page_file)
83
            except Exception, e:
84
                self.logger.fatal("An exception has been caugth: "+str(e.message))
85
                self.finalize()
86
                return 1
87
88
        self.finalize()
89
        return 0
90
91
    def finalize(self):
92
        """ Finalize the job
93
        """
94
        # super(PDFConverter, self).store_file()
95
        self.logger.debug("::: PDF conversion END (%s) :::" % str(self.unzipped))
96