1 | ## |
---|
2 | ## imagestorage.py |
---|
3 | ## Login : <uli@pu.smp.net> |
---|
4 | ## Started on Mon Jul 4 16:02:14 2011 Uli Fouquet |
---|
5 | ## $Id$ |
---|
6 | ## |
---|
7 | ## Copyright (C) 2011 Uli Fouquet |
---|
8 | ## This program is free software; you can redistribute it and/or modify |
---|
9 | ## it under the terms of the GNU General Public License as published by |
---|
10 | ## the Free Software Foundation; either version 2 of the License, or |
---|
11 | ## (at your option) any later version. |
---|
12 | ## |
---|
13 | ## This program is distributed in the hope that it will be useful, |
---|
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
16 | ## GNU General Public License for more details. |
---|
17 | ## |
---|
18 | ## You should have received a copy of the GNU General Public License |
---|
19 | ## along with this program; if not, write to the Free Software |
---|
20 | ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
21 | ## |
---|
22 | """A storage for image files. |
---|
23 | """ |
---|
24 | import grok |
---|
25 | import hashlib |
---|
26 | import os |
---|
27 | import tempfile |
---|
28 | import transaction |
---|
29 | import warnings |
---|
30 | from StringIO import StringIO |
---|
31 | from ZODB.blob import Blob |
---|
32 | from persistent import Persistent |
---|
33 | from hurry.file import HurryFile |
---|
34 | from hurry.file.interfaces import IFileRetrieval |
---|
35 | from zope.component import queryUtility |
---|
36 | from zope.interface import Interface |
---|
37 | from waeup.sirp.image import WAeUPImageFile |
---|
38 | from waeup.sirp.interfaces import ( |
---|
39 | IFileStoreNameChooser, IExtFileStore, IFileStoreHandler,) |
---|
40 | from waeup.sirp.utils.helpers import cmp_files |
---|
41 | |
---|
42 | def md5digest(fd): |
---|
43 | """Get an MD5 hexdigest for the file stored in `fd`. |
---|
44 | |
---|
45 | `fd` |
---|
46 | a file object open for reading. |
---|
47 | |
---|
48 | """ |
---|
49 | return hashlib.md5(fd.read()).hexdigest() |
---|
50 | |
---|
51 | class FileStoreNameChooser(grok.Adapter): |
---|
52 | grok.context(Interface) |
---|
53 | grok.implements(IFileStoreNameChooser) |
---|
54 | |
---|
55 | def checkName(self, name): |
---|
56 | """Check whether an object name is valid. |
---|
57 | |
---|
58 | Raises a user error if the name is not valid. |
---|
59 | """ |
---|
60 | pass |
---|
61 | |
---|
62 | def chooseName(self, name): |
---|
63 | """Choose a unique valid name for the object. |
---|
64 | |
---|
65 | The given name and object may be taken into account when |
---|
66 | choosing the name. |
---|
67 | |
---|
68 | chooseName is expected to always choose a valid name (that |
---|
69 | would pass the checkName test) and never raise an error. |
---|
70 | """ |
---|
71 | return u'unknown_file' |
---|
72 | |
---|
73 | class Basket(grok.Container): |
---|
74 | """A basket holds a set of image files with same hash. |
---|
75 | """ |
---|
76 | |
---|
77 | def _del(self): |
---|
78 | """Remove temporary files associated with local blobs. |
---|
79 | |
---|
80 | A basket holds files as Blob objects. Unfortunately, if a |
---|
81 | basket was not committed (put into ZODB), those blobs linger |
---|
82 | around as real files in some temporary directory and won't be |
---|
83 | removed. |
---|
84 | |
---|
85 | This is a helper function to remove all those uncommitted |
---|
86 | blobs that has to be called explicitly, for instance in tests. |
---|
87 | """ |
---|
88 | key_list = self.keys() |
---|
89 | for key in key_list: |
---|
90 | item = self[key] |
---|
91 | if getattr(item, '_p_oid', None): |
---|
92 | # Don't mess around with blobs in ZODB |
---|
93 | continue |
---|
94 | fd = item.open('r') |
---|
95 | name = getattr(fd, 'name', None) |
---|
96 | fd.close() |
---|
97 | if name is not None and os.path.exists(name): |
---|
98 | os.unlink(name) |
---|
99 | del self[key] |
---|
100 | return |
---|
101 | |
---|
102 | def getInternalId(self, fd): |
---|
103 | """Get the basket-internal id for the file stored in `fd`. |
---|
104 | |
---|
105 | `fd` must be a file open for reading. If an (byte-wise) equal |
---|
106 | file can be found in the basket, its internal id (basket id) |
---|
107 | is returned, ``None`` otherwise. |
---|
108 | """ |
---|
109 | fd.seek(0) |
---|
110 | for key, val in self.items(): |
---|
111 | fd_stored = val.open('r') |
---|
112 | file_len = os.stat(fd_stored.name)[6] |
---|
113 | if file_len == 0: |
---|
114 | # Nasty workaround. Blobs seem to suffer from being emptied |
---|
115 | # accidentally. |
---|
116 | site = grok.getSite() |
---|
117 | if site is not None: |
---|
118 | site.logger.warn( |
---|
119 | 'Empty Blob detected: %s' % fd_stored.name) |
---|
120 | warnings.warn("EMPTY BLOB DETECTED: %s" % fd_stored.name) |
---|
121 | fd_stored.close() |
---|
122 | val.open('w').write(fd.read()) |
---|
123 | return key |
---|
124 | fd_stored.seek(0) |
---|
125 | if cmp_files(fd, fd_stored): |
---|
126 | fd_stored.close() |
---|
127 | return key |
---|
128 | fd_stored.close() |
---|
129 | return None |
---|
130 | |
---|
131 | @property |
---|
132 | def curr_id(self): |
---|
133 | """The current basket id. |
---|
134 | |
---|
135 | An integer number which is not yet in use. If there are |
---|
136 | already `maxint` entries in the basket, a :exc:`ValueError` is |
---|
137 | raised. The latter is _highly_ unlikely. It would mean to have |
---|
138 | more than 2**32 hash collisions, i.e. so many files with the |
---|
139 | same MD5 sum. |
---|
140 | """ |
---|
141 | num = 1 |
---|
142 | while True: |
---|
143 | if str(num) not in self.keys(): |
---|
144 | return str(num) |
---|
145 | num += 1 |
---|
146 | if num <= 0: |
---|
147 | name = getattr(self, '__name__', None) |
---|
148 | raise ValueError('Basket full: %s' % name) |
---|
149 | |
---|
150 | def storeFile(self, fd, filename): |
---|
151 | """Store the file in `fd` into the basket. |
---|
152 | |
---|
153 | The file will be stored in a Blob. |
---|
154 | """ |
---|
155 | fd.seek(0) |
---|
156 | internal_id = self.getInternalId(fd) # Moves file pointer! |
---|
157 | if internal_id is None: |
---|
158 | internal_id = self.curr_id |
---|
159 | fd.seek(0) |
---|
160 | self[internal_id] = Blob() |
---|
161 | transaction.commit() # Urgently needed to make the Blob |
---|
162 | # persistent. Took me ages to find |
---|
163 | # out that solution, which makes some |
---|
164 | # design flaw in ZODB Blobs likely. |
---|
165 | self[internal_id].open('w').write(fd.read()) |
---|
166 | fd.seek(0) |
---|
167 | self._p_changed = True |
---|
168 | return internal_id |
---|
169 | |
---|
170 | def retrieveFile(self, basket_id): |
---|
171 | """Retrieve a file open for reading with basket id `basket_id`. |
---|
172 | |
---|
173 | If there is no such id, ``None`` is returned. It is the |
---|
174 | callers responsibility to close the open file. |
---|
175 | """ |
---|
176 | if basket_id in self.keys(): |
---|
177 | return self[basket_id].open('r') |
---|
178 | return None |
---|
179 | |
---|
180 | class ImageStorage(grok.Container): |
---|
181 | """A container for image files. |
---|
182 | """ |
---|
183 | def _del(self): |
---|
184 | for basket in self.values(): |
---|
185 | try: |
---|
186 | basket._del() |
---|
187 | except: |
---|
188 | pass |
---|
189 | |
---|
190 | def storeFile(self, fd, filename): |
---|
191 | fd.seek(0) |
---|
192 | digest = md5digest(fd) |
---|
193 | fd.seek(0) |
---|
194 | if not digest in self.keys(): |
---|
195 | self[digest] = Basket() |
---|
196 | basket_id = self[digest].storeFile(fd, filename) |
---|
197 | full_id = "%s-%s" % (digest, basket_id) |
---|
198 | return full_id |
---|
199 | |
---|
200 | def retrieveFile(self, file_id): |
---|
201 | if not '-' in file_id: |
---|
202 | return None |
---|
203 | full_id, basket_id = file_id.split('-', 1) |
---|
204 | if not full_id in self.keys(): |
---|
205 | return None |
---|
206 | return self[full_id].retrieveFile(basket_id) |
---|
207 | |
---|
208 | class ImageStorageFileRetrieval(Persistent): |
---|
209 | grok.implements(IFileRetrieval) |
---|
210 | |
---|
211 | def getImageStorage(self): |
---|
212 | site = grok.getSite() |
---|
213 | if site is None: |
---|
214 | return None |
---|
215 | return site.get('images', None) |
---|
216 | |
---|
217 | def isImageStorageEnabled(self): |
---|
218 | site = grok.getSite() |
---|
219 | if site is None: |
---|
220 | return False |
---|
221 | if site.get('images', None) is None: |
---|
222 | return False |
---|
223 | return True |
---|
224 | |
---|
225 | def getFile(self, data): |
---|
226 | # ImageStorage is disabled, so give fall-back behaviour for |
---|
227 | # testing without ImageStorage |
---|
228 | if not self.isImageStorageEnabled(): |
---|
229 | return StringIO(data) |
---|
230 | storage = self.getImageStorage() |
---|
231 | if storage is None: |
---|
232 | raise ValueError('Cannot find an image storage') |
---|
233 | result = storage.retrieveFile(data) |
---|
234 | if result is None: |
---|
235 | return StringIO(data) |
---|
236 | return storage.retrieveFile(data) |
---|
237 | |
---|
238 | def createFile(self, filename, f): |
---|
239 | if not self.isImageStorageEnabled(): |
---|
240 | return WAeUPImageFile(filename, f.read()) |
---|
241 | storage = self.getImageStorage() |
---|
242 | if storage is None: |
---|
243 | raise ValueError('Cannot find an image storage') |
---|
244 | file_id = storage.storeFile(f, filename) |
---|
245 | return WAeUPImageFile(filename, file_id) |
---|
246 | |
---|
247 | |
---|
248 | class ExtFileStore(object): #grok.GlobalUtility): |
---|
249 | """External file store. |
---|
250 | |
---|
251 | External file stores are meant to store files 'externally' of the |
---|
252 | ZODB, i.e. in filesystem. |
---|
253 | |
---|
254 | Most important attribute of the external file store is the `root` |
---|
255 | path which gives the path to the location where files will be |
---|
256 | stored within. |
---|
257 | |
---|
258 | By default `root` is a ``'media/'`` directory in the root of the |
---|
259 | datacenter root of a site. |
---|
260 | |
---|
261 | The `root` attribute is 'read-only' because you normally don't |
---|
262 | want to change this path -- it is dynamic. That means, if you call |
---|
263 | the file store from 'within' a site, the root path will be located |
---|
264 | inside this site (a :class:`waeup.sirp.University` instance). If |
---|
265 | you call it from 'outside' a site some temporary dir (always the |
---|
266 | same during lifetime of the file store instance) will be used. The |
---|
267 | term 'temporary' tells what you can expect from this path |
---|
268 | persistence-wise. |
---|
269 | |
---|
270 | If you insist, you can pass a root path on initialization to the |
---|
271 | constructor but when calling from within a site afterwards, the |
---|
272 | site will override your setting for security measures. This way |
---|
273 | you can safely use one file store for different sites in a Zope |
---|
274 | instance simultanously and files from one site won't show up in |
---|
275 | another. |
---|
276 | |
---|
277 | An ExtFileStore instance is available as a global utility |
---|
278 | implementing :iface:`waeup.sirp.interfaces.IExtFileStore`. |
---|
279 | |
---|
280 | To add and retrieve files from the storage, use the appropriate |
---|
281 | methods below. |
---|
282 | """ |
---|
283 | |
---|
284 | grok.implements(IExtFileStore) |
---|
285 | |
---|
286 | _root = None |
---|
287 | |
---|
288 | @property |
---|
289 | def root(self): |
---|
290 | """Root dir of this storage. |
---|
291 | |
---|
292 | The root dir is a readonly value determined dynamically. It |
---|
293 | holds media files for sites or other components. |
---|
294 | |
---|
295 | If a site is available we return a ``media/`` dir in the |
---|
296 | datacenter storage dir. |
---|
297 | |
---|
298 | Otherwise we create a temporary dir which will be remembered |
---|
299 | on next call. |
---|
300 | |
---|
301 | If a site exists and has a datacenter, it has always |
---|
302 | precedence over temporary dirs, also after a temporary |
---|
303 | directory was created. |
---|
304 | |
---|
305 | Please note that retrieving `root` is expensive. You might |
---|
306 | want to store a copy once retrieved in order to minimize the |
---|
307 | number of calls to `root`. |
---|
308 | |
---|
309 | """ |
---|
310 | site = grok.getSite() |
---|
311 | if site is not None: |
---|
312 | root = os.path.join(site['datacenter'].storage, 'media') |
---|
313 | return root |
---|
314 | if self._root is None: |
---|
315 | self._root = tempfile.mkdtemp() |
---|
316 | return self._root |
---|
317 | |
---|
318 | def __init__(self, root=None): |
---|
319 | self._root = root |
---|
320 | return |
---|
321 | |
---|
322 | def getFile(self, file_id): |
---|
323 | """Get a file stored under file ID `file_id`. |
---|
324 | |
---|
325 | If the file cannot be found ``None`` is returned. |
---|
326 | """ |
---|
327 | marker, filename, base, ext = self.extractMarker(file_id) |
---|
328 | handler = queryUtility(IFileStoreHandler, name=marker, |
---|
329 | default=DefaultFileStoreHandler()) |
---|
330 | path = handler.pathFromFileID(self, self.root, file_id) |
---|
331 | if not os.path.exists(path): |
---|
332 | return None |
---|
333 | fd = open(path, 'rb') |
---|
334 | return fd |
---|
335 | |
---|
336 | def createFile(self, filename, f): |
---|
337 | """Store a file. |
---|
338 | """ |
---|
339 | file_id = filename |
---|
340 | root = self.root # Calls to self.root are expensive |
---|
341 | marker, filename, base, ext = self.extractMarker(file_id) |
---|
342 | handler = queryUtility(IFileStoreHandler, name=marker, |
---|
343 | default=DefaultFileStoreHandler()) |
---|
344 | f, path, file_obj = handler.createFile( |
---|
345 | self, root, file_id, filename, f) |
---|
346 | dirname = os.path.dirname(path) |
---|
347 | if not os.path.exists(dirname): |
---|
348 | os.makedirs(dirname, 0755) |
---|
349 | open(path, 'wb').write(f.read()) |
---|
350 | return file_obj |
---|
351 | |
---|
352 | def extractMarker(self, file_id): |
---|
353 | """split filename into marker, filename, basename, and extension. |
---|
354 | |
---|
355 | A marker is a leading part of a string of form |
---|
356 | ``__MARKERNAME__`` followed by the real filename. This way we |
---|
357 | can put markers into a filename to request special processing. |
---|
358 | |
---|
359 | Returns a quadruple |
---|
360 | |
---|
361 | ``(marker, filename, basename, extension)`` |
---|
362 | |
---|
363 | where ``marker`` is the marker in lowercase, filename is the |
---|
364 | complete trailing real filename, ``basename`` is the basename |
---|
365 | of the filename and ``extension`` the filename extension of |
---|
366 | the trailing filename. See examples below. |
---|
367 | |
---|
368 | Example: |
---|
369 | |
---|
370 | >>> extractMarker('__MaRkEr__sample.jpg') |
---|
371 | ('marker', 'sample.jpg', 'sample', '.jpg') |
---|
372 | |
---|
373 | If no marker is contained, we assume the whole string to be a |
---|
374 | real filename: |
---|
375 | |
---|
376 | >>> extractMarker('no-marker.txt') |
---|
377 | ('', 'no-marker.txt', 'no-marker', '.txt') |
---|
378 | |
---|
379 | Filenames without extension give an empty extension string: |
---|
380 | |
---|
381 | >>> extractMarker('no-marker') |
---|
382 | ('', 'no-marker', 'no-marker', '') |
---|
383 | |
---|
384 | """ |
---|
385 | if not isinstance(file_id, basestring) or not file_id: |
---|
386 | return ('', '', '', '') |
---|
387 | parts = file_id.split('__', 2) |
---|
388 | marker = '' |
---|
389 | if len(parts) == 3 and parts[0] == '': |
---|
390 | marker = parts[1].lower() |
---|
391 | file_id = parts[2] |
---|
392 | basename, ext = os.path.splitext(file_id) |
---|
393 | return (marker, file_id, basename, ext) |
---|
394 | |
---|
395 | grok.global_utility(ExtFileStore, provides=IExtFileStore) |
---|
396 | |
---|
397 | class DefaultStorage(ExtFileStore): |
---|
398 | grok.provides(IFileRetrieval) |
---|
399 | |
---|
400 | grok.global_utility(DefaultStorage, provides=IFileRetrieval) |
---|
401 | |
---|
402 | class DefaultFileStoreHandler(grok.GlobalUtility): |
---|
403 | grok.implements(IFileStoreHandler) |
---|
404 | |
---|
405 | def pathFromFileID(self, store, root, file_id): |
---|
406 | return os.path.join(root, file_id) |
---|
407 | |
---|
408 | def createFile(self, store, root, filename, file_id, f): |
---|
409 | path = self.pathFromFileID(store, root, file_id) |
---|
410 | return f, path, WAeUPImageFile(filename, file_id) |
---|
411 | return path, HurryFile(filename, file_id) |
---|