## ## imagestorage.py ## Login : ## Started on Mon Jul 4 16:02:14 2011 Uli Fouquet ## $Id$ ## ## Copyright (C) 2011 Uli Fouquet ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## """A storage for image files. """ import grok import hashlib import os import transaction import warnings from StringIO import StringIO from ZODB.blob import Blob from persistent import Persistent from hurry.file.interfaces import IFileRetrieval from waeup.sirp.image import WAeUPImageFile from waeup.sirp.utils.helpers import cmp_files def md5digest(fd): """Get an MD5 hexdigest for the file stored in `fd`. `fd` a file object open for reading. """ return hashlib.md5(fd.read()).hexdigest() class Basket(grok.Container): """A basket holds a set of image files with same hash. """ def _del(self): """Remove temporary files associated with local blobs. A basket holds files as Blob objects. Unfortunately, if a basket was not committed (put into ZODB), those blobs linger around as real files in some temporary directory and won't be removed. This is a helper function to remove all those uncommitted blobs that has to be called explicitly, for instance in tests. """ key_list = list(self.keys()) for key in key_list: item = self[key] if getattr(item, '_p_oid', None): # Don't mess around with blobs in ZODB continue fd = item.open('r') name = getattr(fd, 'name', None) fd.close() if name is not None and os.path.exists(name): os.unlink(name) del self[key] return def getInternalId(self, fd): """Get the basket-internal id for the file stored in `fd`. `fd` must be a file open for reading. If an (byte-wise) equal file can be found in the basket, its internal id (basket id) is returned, ``None`` otherwise. """ fd.seek(0) for key, val in self.items(): fd_stored = val.open('r') file_len = os.stat(fd_stored.name)[6] if file_len == 0: # Nasty workaround. Blobs seem to suffer from being emptied # accidentally. site = grok.getSite() if site is not None: site.logger.warn( 'Empty Blob detected: %s' % fd_stored.name) warnings.warn("EMPTY BLOB DETECTED: %s" % fd_stored.name) fd_stored.close() val.open('w').write(fd.read()) return key fd_stored.seek(0) if cmp_files(fd, fd_stored): fd_stored.close() return key fd_stored.close() return None @property def curr_id(self): """The current basket id. An integer number which is not yet in use. If there are already `maxint` entries in the basket, a :exc:`ValueError` is raised. The latter is _highly_ unlikely. It would mean to have more than 2**32 hash collisions, i.e. so many files with the same MD5 sum. """ num = 1 while True: if str(num) not in self.keys(): return str(num) num += 1 if num <= 0: name = getattr(self, '__name__', None) raise ValueError('Basket full: %s' % name) def storeFile(self, fd, filename): """Store the file in `fd` into the basket. The file will be stored in a Blob. """ fd.seek(0) internal_id = self.getInternalId(fd) # Moves file pointer! if internal_id is None: internal_id = self.curr_id fd.seek(0) self[internal_id] = Blob() transaction.commit() # Urgently needed to make the Blob # persistent. Took me ages to find # out that solution, which makes some # design flaw in ZODB Blobs likely. self[internal_id].open('w').write(fd.read()) fd.seek(0) self._p_changed = True return internal_id def retrieveFile(self, basket_id): """Retrieve a file open for reading with basket id `basket_id`. If there is no such id, ``None`` is returned. It is the callers responsibility to close the open file. """ if basket_id in self.keys(): return self[basket_id].open('r') return None class ImageStorage(grok.Container): """A container for image files. """ def _del(self): for basket in self.values(): try: basket._del() except: pass def storeFile(self, fd, filename): fd.seek(0) digest = md5digest(fd) fd.seek(0) if not digest in self.keys(): self[digest] = Basket() basket_id = self[digest].storeFile(fd, filename) full_id = "%s-%s" % (digest, basket_id) return full_id def retrieveFile(self, file_id): if not '-' in file_id: return None full_id, basket_id = file_id.split('-', 1) if not full_id in self.keys(): return None return self[full_id].retrieveFile(basket_id) class ImageStorageFileRetrieval(Persistent): grok.implements(IFileRetrieval) def getImageStorage(self): site = grok.getSite() if site is None: return None return site.get('images', None) def isImageStorageEnabled(self): site = grok.getSite() if site is None: return False if site.get('images', None) is None: return False return True def getFile(self, data): # ImageStorage is disabled, so give fall-back behaviour for # testing without ImageStorage if not self.isImageStorageEnabled(): return StringIO(data) storage = self.getImageStorage() if storage is None: raise ValueError('Cannot find an image storage') result = storage.retrieveFile(data) if result is None: return StringIO(data) return storage.retrieveFile(data) def createFile(self, filename, f): if not self.isImageStorageEnabled(): return WAeUPImageFile(filename, f.read()) storage = self.getImageStorage() if storage is None: raise ValueError('Cannot find an image storage') file_id = storage.storeFile(f, filename) return WAeUPImageFile(filename, file_id)