ExtractingQuery - Code Metrics - Inspection of "[TASK] Documentation Upmerge from 6.1" - TYPO3-Solr/ext-solr - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#1249)

unknown

created 2017-04-06 10:23 UTC

ExtractingQuery A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	123
Duplicated Lines	0 %

Coupling/Cohesion

Components	1
Dependencies	1

Test Coverage

Coverage

69.77%

Importance

Changes

Metric	Value
wmc	12
lcom	1
cbo	1
dl	0
loc	123
ccs	30
cts	43
cp	0.6977
rs	10
c	0
b	0
f	0

8 Methods

Rating	Name	Size	Complexity
A	__construct()	7	1
A	getMultiPartPostDataBoundary()	4	1
A	getFile()	4	1
A	setFile()	6	2
A	getFileName()	4	1
A	getRawPostFileData()	22	3
A	setExtractOnly()	8	2
A	getQueryParameters()	13	1

<?php
namespace ApacheSolrForTypo3\Solr;

/***************************************************************
 *  Copyright notice
 *
 *  (c) 2010-2015 Ingo Renner <[email protected]>
 *  All rights reserved
 *
 *  This script is part of the TYPO3 project. The TYPO3 project is
 *  free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  The GNU General Public License can be found at
 *  http://www.gnu.org/copyleft/gpl.html.
 *
 *  This script is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  This copyright notice MUST APPEAR in all copies of the script!
 ***************************************************************/

/**
 * Specialized query for content extraction using Solr Cell
 *
 */
class ExtractingQuery extends Query
{
    protected $file;
    protected $multiPartPostDataBoundary;

    /**
     * Constructor
     *
     * @param string $file Absolute path to the file to extract content and meta data from.
     */
    public function __construct($file)
    {
        parent::__construct('');

        $this->file = $file;
        $this->multiPartPostDataBoundary = '--' . md5(uniqid(time()));
    }

    /**
     * Returns the boundary used for this multi-part form-data POST body data.
     *
     * @return string multi-part form-data POST boundary
     */
    public function getMultiPartPostDataBoundary()
    {
        return $this->multiPartPostDataBoundary;
    }

    /**
     * Gets the absolute path to the file to extract content and meta data from.
     *
     * @return string Absolute path to the file to extract content and meta data from.
     */
    public function getFile()
    {
        return $this->file;
    }

    /**
     * Sets the absolute path to the file to extract content and meta data from.
     *
     * @param string $file Absolute path to the file to extract content and meta data from.
     */
    public function setFile($file)
    {
        if (is_file($file)) {
            $this->file = $file;
        }
    }

    /**
     * Gets the filename portion of the file.
     *
     * @return string The filename.
     */
    public function getFileName()
    {
        return basename($this->file);
    }

    /**
     * Constructs a multi-part form-data POST body from the file's content.
     *
     * @param string $boundary Optional boundary to use
     * @return string The file to extract as raw POST data.
     * @throws \Apache_Solr_InvalidArgumentException
     */
    public function getRawPostFileData($boundary = '')
    {
        if (empty($boundary)) {
            $boundary = $this->multiPartPostDataBoundary;
        }

        $fileData = file_get_contents($this->file);
        if ($fileData === false) {
            throw new \Apache_Solr_InvalidArgumentException(
                'Could not retrieve content from file ' . $this->file
            );
        }

        $data = "--{$boundary}\r\n";
        // The 'filename' used here becomes the property name in the response.
        $data .= 'Content-Disposition: form-data; name="file"; filename="extracted"';
        $data .= "\r\nContent-Type: application/octet-stream\r\n\r\n";
        $data .= $fileData;
        $data .= "\r\n--{$boundary}--\r\n";

        return $data;
    }

    /**
     * En / Disables extraction only
     *
     * @param bool $extractOnly If TRUE, only extracts content from the given file without indexing
     */
    public function setExtractOnly($extractOnly = true)
    {
        if ($extractOnly) {
            $this->queryParameters['extractOnly'] = 'true';
        } else {
            unset($this->queryParameters['extractOnly']);
        }
    }

    /**
     * Builds an array of query parameters to use for the search query.
     *
     * @return array An array ready to use with query parameters
     */
    public function getQueryParameters()
    {
        $filename = basename($this->file);

        // TODO create an Apache Solr patch to support Apache Tika's -m (and -l) options
        $suggestParameters = [
            'resource.name' => $filename,
            'extractFormat' => 'text',
            // Matches the -t command for the tika CLI app.
        ];

        return array_merge($suggestParameters, $this->queryParameters);
    }
}


1		<?php
2		namespace ApacheSolrForTypo3\Solr;
3
4		/***************************************************************
5		* Copyright notice
6		*
7		* (c) 2010-2015 Ingo Renner <[email protected]>
8		* All rights reserved
9		*
10		* This script is part of the TYPO3 project. The TYPO3 project is
11		* free software; you can redistribute it and/or modify
12		* it under the terms of the GNU General Public License as published by
13		* the Free Software Foundation; either version 2 of the License, or
14		* (at your option) any later version.
15		*
16		* The GNU General Public License can be found at
17		* http://www.gnu.org/copyleft/gpl.html.
18		*
19		* This script is distributed in the hope that it will be useful,
20		* but WITHOUT ANY WARRANTY; without even the implied warranty of
21		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22		* GNU General Public License for more details.
23		*
24		* This copyright notice MUST APPEAR in all copies of the script!
25		***************************************************************/
26
27		/**
28		* Specialized query for content extraction using Solr Cell
29		*
30		*/
31		class ExtractingQuery extends Query
32		{
33		protected $file;
34		protected $multiPartPostDataBoundary;
35
36		/**
37		* Constructor
38		*
39		* @param string $file Absolute path to the file to extract content and meta data from.
40		*/
41	1	public function __construct($file)
42		{
43	1	parent::__construct('');
44
45	1	$this->file = $file;
46	1	$this->multiPartPostDataBoundary = '--' . md5(uniqid(time()));
47	1	}
48
49		/**
50		* Returns the boundary used for this multi-part form-data POST body data.
51		*
52		* @return string multi-part form-data POST boundary
53		*/
54	1	public function getMultiPartPostDataBoundary()
55		{
56	1	return $this->multiPartPostDataBoundary;
57		}
58
59		/**
60		* Gets the absolute path to the file to extract content and meta data from.
61		*
62		* @return string Absolute path to the file to extract content and meta data from.
63		*/
64		public function getFile()
65		{
66		return $this->file;
67		}
68
69		/**
70		* Sets the absolute path to the file to extract content and meta data from.
71		*
72		* @param string $file Absolute path to the file to extract content and meta data from.
73		*/
74		public function setFile($file)
75		{
76		if (is_file($file)) {
77		$this->file = $file;
78		}
79		}
80
81		/**
82		* Gets the filename portion of the file.
83		*
84		* @return string The filename.
85		*/
86		public function getFileName()
87		{
88		return basename($this->file);
89		}
90
91		/**
92		* Constructs a multi-part form-data POST body from the file's content.
93		*
94		* @param string $boundary Optional boundary to use
95		* @return string The file to extract as raw POST data.
96		* @throws \Apache_Solr_InvalidArgumentException
97		*/
98	1	public function getRawPostFileData($boundary = '')
99		{
100	1	if (empty($boundary)) {
101	1	$boundary = $this->multiPartPostDataBoundary;
102	1	}
103
104	1	$fileData = file_get_contents($this->file);
105	1	if ($fileData === false) {
106		throw new \Apache_Solr_InvalidArgumentException(
107		'Could not retrieve content from file ' . $this->file
108		);
109		}
110
111	1	$data = "--{$boundary}\r\n";
112		// The 'filename' used here becomes the property name in the response.
113	1	$data .= 'Content-Disposition: form-data; name="file"; filename="extracted"';
114	1	$data .= "\r\nContent-Type: application/octet-stream\r\n\r\n";
115	1	$data .= $fileData;
116	1	$data .= "\r\n--{$boundary}--\r\n";
117
118	1	return $data;
119		}
120
121		/**
122		* En / Disables extraction only
123		*
124		* @param bool $extractOnly If TRUE, only extracts content from the given file without indexing
125		*/
126	1	public function setExtractOnly($extractOnly = true)
127		{
128	1	if ($extractOnly) {
129	1	$this->queryParameters['extractOnly'] = 'true';
130	1	} else {
131		unset($this->queryParameters['extractOnly']);
132		}
133	1	}
134
135		/**
136		* Builds an array of query parameters to use for the search query.
137		*
138		* @return array An array ready to use with query parameters
139		*/
140	1	public function getQueryParameters()
141		{
142	1	$filename = basename($this->file);
143
144		// TODO create an Apache Solr patch to support Apache Tika's -m (and -l) options
145		$suggestParameters = [
146	1	'resource.name' => $filename,
147	1	'extractFormat' => 'text',
148		// Matches the -t command for the tika CLI app.
149	1	];
150
151	1	return array_merge($suggestParameters, $this->queryParameters);
152		}
153		}
154

TYPO3-Solr / ext-solr

Pull Request — master (#1249)

ExtractingQuery A

Complexity

Size/Duplication

Coupling/Cohesion

Test Coverage

Importance

8 Methods

Duplication Side-by-Side

Filter issues like