silverstripe /
silverstripe-textextraction
| 1 | <?php |
||
| 2 | |||
| 3 | namespace SilverStripe\TextExtraction\Extension; |
||
| 4 | |||
| 5 | use SilverStripe\Assets\File; |
||
| 6 | use SilverStripe\ORM\DataExtension; |
||
| 7 | use SilverStripe\TextExtraction\Cache\FileTextCache; |
||
| 8 | use SilverStripe\TextExtraction\Extractor\FileTextExtractor; |
||
| 9 | |||
| 10 | /** |
||
| 11 | * Decorate File or a File derivative to enable text extraction from the file content. Uses a set of subclasses of |
||
| 12 | * FileTextExtractor to do the extraction based on the content type of the file. |
||
| 13 | * |
||
| 14 | * Adds an additional property which is the cached contents, which is populated on demand. |
||
| 15 | * |
||
| 16 | * @author mstephens |
||
| 17 | */ |
||
| 18 | class FileTextExtractable extends DataExtension |
||
| 19 | { |
||
| 20 | /** |
||
| 21 | * @var array |
||
| 22 | * @config |
||
| 23 | */ |
||
| 24 | private static $db = [ |
||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 25 | 'FileContentCache' => 'Text' |
||
| 26 | ]; |
||
| 27 | |||
| 28 | /** |
||
| 29 | * @var array |
||
| 30 | * @config |
||
| 31 | */ |
||
| 32 | private static $casting = [ |
||
|
0 ignored issues
–
show
|
|||
| 33 | 'FileContent' => 'Text' |
||
| 34 | ]; |
||
| 35 | |||
| 36 | /** |
||
| 37 | * @var array |
||
| 38 | * @config |
||
| 39 | */ |
||
| 40 | private static $dependencies = [ |
||
|
0 ignored issues
–
show
|
|||
| 41 | 'TextCache' => '%$' . FileTextCache::class, |
||
| 42 | ]; |
||
| 43 | |||
| 44 | /** |
||
| 45 | * @var FileTextCache |
||
| 46 | */ |
||
| 47 | protected $fileTextCache = null; |
||
| 48 | |||
| 49 | /** |
||
| 50 | * @param FileTextCache $cache |
||
| 51 | * @return $this |
||
| 52 | */ |
||
| 53 | public function setTextCache(FileTextCache $cache) |
||
| 54 | { |
||
| 55 | $this->fileTextCache = $cache; |
||
| 56 | return $this; |
||
| 57 | } |
||
| 58 | |||
| 59 | /** |
||
| 60 | * @return FileTextCache |
||
| 61 | */ |
||
| 62 | public function getTextCache() |
||
| 63 | { |
||
| 64 | return $this->fileTextCache; |
||
| 65 | } |
||
| 66 | |||
| 67 | /** |
||
| 68 | * Helper function for template |
||
| 69 | * |
||
| 70 | * @return string |
||
| 71 | */ |
||
| 72 | public function getFileContent() |
||
| 73 | { |
||
| 74 | return $this->extractFileAsText(); |
||
| 75 | } |
||
| 76 | |||
| 77 | /** |
||
| 78 | * Tries to parse the file contents if a FileTextExtractor class exists to handle the file type, and |
||
| 79 | * returns the text. The value is also cached into the File record itself. |
||
| 80 | * |
||
| 81 | * @param boolean $disableCache If false, the file content is only parsed on demand. |
||
| 82 | * If true, the content parsing is forced, bypassing |
||
| 83 | * the cached version |
||
| 84 | * @return string|null |
||
| 85 | */ |
||
| 86 | public function extractFileAsText($disableCache = false) |
||
| 87 | { |
||
| 88 | /** @var File $file */ |
||
| 89 | $file = $this->owner; |
||
| 90 | if (!$disableCache) { |
||
| 91 | $text = $this->getTextCache()->load($file); |
||
| 92 | if ($text) { |
||
| 93 | return $text; |
||
| 94 | } |
||
| 95 | } |
||
| 96 | |||
| 97 | // Determine which extractor can process this file. |
||
| 98 | $extractor = FileTextExtractor::for_file($file); |
||
| 99 | if (!$extractor) { |
||
| 100 | return null; |
||
| 101 | } |
||
| 102 | |||
| 103 | $text = $extractor->getContent($file); |
||
| 104 | if (!$text) { |
||
| 105 | return null; |
||
| 106 | } |
||
| 107 | |||
| 108 | if (!$disableCache) { |
||
| 109 | $this->getTextCache()->save($file, $text); |
||
| 110 | } |
||
| 111 | |||
| 112 | return $text; |
||
| 113 | } |
||
| 114 | |||
| 115 | /** |
||
| 116 | * @return void |
||
| 117 | */ |
||
| 118 | public function onBeforeWrite() |
||
| 119 | { |
||
| 120 | // Clear cache before changing file |
||
| 121 | $this->getTextCache()->invalidate($this->owner); |
||
| 122 | } |
||
| 123 | } |
||
| 124 |