1 | <?php |
||
2 | |||
3 | namespace SilverStripe\TextExtraction\Extension; |
||
4 | |||
5 | use SilverStripe\Assets\File; |
||
6 | use SilverStripe\ORM\DataExtension; |
||
7 | use SilverStripe\TextExtraction\Cache\FileTextCache; |
||
8 | use SilverStripe\TextExtraction\Extractor\FileTextExtractor; |
||
9 | |||
10 | /** |
||
11 | * Decorate File or a File derivative to enable text extraction from the file content. Uses a set of subclasses of |
||
12 | * FileTextExtractor to do the extraction based on the content type of the file. |
||
13 | * |
||
14 | * Adds an additional property which is the cached contents, which is populated on demand. |
||
15 | * |
||
16 | * @author mstephens |
||
17 | */ |
||
18 | class FileTextExtractable extends DataExtension |
||
19 | { |
||
20 | /** |
||
21 | * @var array |
||
22 | * @config |
||
23 | */ |
||
24 | private static $db = [ |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
25 | 'FileContentCache' => 'Text' |
||
26 | ]; |
||
27 | |||
28 | /** |
||
29 | * @var array |
||
30 | * @config |
||
31 | */ |
||
32 | private static $casting = [ |
||
0 ignored issues
–
show
|
|||
33 | 'FileContent' => 'Text' |
||
34 | ]; |
||
35 | |||
36 | /** |
||
37 | * @var array |
||
38 | * @config |
||
39 | */ |
||
40 | private static $dependencies = [ |
||
0 ignored issues
–
show
|
|||
41 | 'TextCache' => '%$' . FileTextCache::class, |
||
42 | ]; |
||
43 | |||
44 | /** |
||
45 | * @var FileTextCache |
||
46 | */ |
||
47 | protected $fileTextCache = null; |
||
48 | |||
49 | /** |
||
50 | * @param FileTextCache $cache |
||
51 | * @return $this |
||
52 | */ |
||
53 | public function setTextCache(FileTextCache $cache) |
||
54 | { |
||
55 | $this->fileTextCache = $cache; |
||
56 | return $this; |
||
57 | } |
||
58 | |||
59 | /** |
||
60 | * @return FileTextCache |
||
61 | */ |
||
62 | public function getTextCache() |
||
63 | { |
||
64 | return $this->fileTextCache; |
||
65 | } |
||
66 | |||
67 | /** |
||
68 | * Helper function for template |
||
69 | * |
||
70 | * @return string |
||
71 | */ |
||
72 | public function getFileContent() |
||
73 | { |
||
74 | return $this->extractFileAsText(); |
||
75 | } |
||
76 | |||
77 | /** |
||
78 | * Tries to parse the file contents if a FileTextExtractor class exists to handle the file type, and |
||
79 | * returns the text. The value is also cached into the File record itself. |
||
80 | * |
||
81 | * @param boolean $disableCache If false, the file content is only parsed on demand. |
||
82 | * If true, the content parsing is forced, bypassing |
||
83 | * the cached version |
||
84 | * @return string|null |
||
85 | */ |
||
86 | public function extractFileAsText($disableCache = false) |
||
87 | { |
||
88 | /** @var File $file */ |
||
89 | $file = $this->owner; |
||
90 | if (!$disableCache) { |
||
91 | $text = $this->getTextCache()->load($file); |
||
92 | if ($text) { |
||
93 | return $text; |
||
94 | } |
||
95 | } |
||
96 | |||
97 | // Determine which extractor can process this file. |
||
98 | $extractor = FileTextExtractor::for_file($file); |
||
99 | if (!$extractor) { |
||
100 | return null; |
||
101 | } |
||
102 | |||
103 | $text = $extractor->getContent($file); |
||
104 | if (!$text) { |
||
105 | return null; |
||
106 | } |
||
107 | |||
108 | if (!$disableCache) { |
||
109 | $this->getTextCache()->save($file, $text); |
||
110 | } |
||
111 | |||
112 | return $text; |
||
113 | } |
||
114 | |||
115 | /** |
||
116 | * @return void |
||
117 | */ |
||
118 | public function onBeforeWrite() |
||
119 | { |
||
120 | // Clear cache before changing file |
||
121 | $this->getTextCache()->invalidate($this->owner); |
||
122 | } |
||
123 | } |
||
124 |