source: main/waeup.sirp/branches/ulif-extimgstore/src/waeup/sirp/imagestorage.py @ 7004

Last change on this file since 7004 was 7002, checked in by uli, 14 years ago

Throw in the complete mess of last 2 weeks. External file storage now works basically (tests pass), although there are lots of things still to remove, finetune, document, etc.

File size: 13.8 KB
Line 
1##
2## imagestorage.py
3## Login : <uli@pu.smp.net>
4## Started on  Mon Jul  4 16:02:14 2011 Uli Fouquet
5## $Id$
6##
7## Copyright (C) 2011 Uli Fouquet
8## This program is free software; you can redistribute it and/or modify
9## it under the terms of the GNU General Public License as published by
10## the Free Software Foundation; either version 2 of the License, or
11## (at your option) any later version.
12##
13## This program is distributed in the hope that it will be useful,
14## but WITHOUT ANY WARRANTY; without even the implied warranty of
15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16## GNU General Public License for more details.
17##
18## You should have received a copy of the GNU General Public License
19## along with this program; if not, write to the Free Software
20## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21##
22"""A storage for image files.
23"""
24import grok
25import hashlib
26import os
27import tempfile
28import transaction
29import warnings
30from StringIO import StringIO
31from ZODB.blob import Blob
32from persistent import Persistent
33from hurry.file import HurryFile
34from hurry.file.interfaces import IFileRetrieval
35from zope.component import queryUtility
36from zope.interface import Interface
37from waeup.sirp.image import WAeUPImageFile
38from waeup.sirp.interfaces import (
39    IFileStoreNameChooser, IExtFileStore, IFileStoreHandler,)
40from waeup.sirp.utils.helpers import cmp_files
41
42def md5digest(fd):
43    """Get an MD5 hexdigest for the file stored in `fd`.
44
45    `fd`
46      a file object open for reading.
47
48    """
49    return hashlib.md5(fd.read()).hexdigest()
50
51class FileStoreNameChooser(grok.Adapter):
52    grok.context(Interface)
53    grok.implements(IFileStoreNameChooser)
54
55    def checkName(self, name):
56        """Check whether an object name is valid.
57
58        Raises a user error if the name is not valid.
59        """
60        pass
61
62    def chooseName(self, name):
63        """Choose a unique valid name for the object.
64
65        The given name and object may be taken into account when
66        choosing the name.
67
68        chooseName is expected to always choose a valid name (that
69        would pass the checkName test) and never raise an error.
70        """
71        return u'unknown_file'
72
73class Basket(grok.Container):
74    """A basket holds a set of image files with same hash.
75    """
76
77    def _del(self):
78        """Remove temporary files associated with local blobs.
79
80        A basket holds files as Blob objects. Unfortunately, if a
81        basket was not committed (put into ZODB), those blobs linger
82        around as real files in some temporary directory and won't be
83        removed.
84
85        This is a helper function to remove all those uncommitted
86        blobs that has to be called explicitly, for instance in tests.
87        """
88        key_list = self.keys()
89        for key in key_list:
90            item = self[key]
91            if getattr(item, '_p_oid', None):
92                # Don't mess around with blobs in ZODB
93                continue
94            fd = item.open('r')
95            name = getattr(fd, 'name', None)
96            fd.close()
97            if name is not None and os.path.exists(name):
98                os.unlink(name)
99            del self[key]
100        return
101
102    def getInternalId(self, fd):
103        """Get the basket-internal id for the file stored in `fd`.
104
105        `fd` must be a file open for reading. If an (byte-wise) equal
106        file can be found in the basket, its internal id (basket id)
107        is returned, ``None`` otherwise.
108        """
109        fd.seek(0)
110        for key, val in self.items():
111            fd_stored = val.open('r')
112            file_len = os.stat(fd_stored.name)[6]
113            if file_len == 0:
114                # Nasty workaround. Blobs seem to suffer from being emptied
115                # accidentally.
116                site = grok.getSite()
117                if site is not None:
118                    site.logger.warn(
119                        'Empty Blob detected: %s' % fd_stored.name)
120                warnings.warn("EMPTY BLOB DETECTED: %s" % fd_stored.name)
121                fd_stored.close()
122                val.open('w').write(fd.read())
123                return key
124            fd_stored.seek(0)
125            if cmp_files(fd, fd_stored):
126                fd_stored.close()
127                return key
128            fd_stored.close()
129        return None
130
131    @property
132    def curr_id(self):
133        """The current basket id.
134
135        An integer number which is not yet in use. If there are
136        already `maxint` entries in the basket, a :exc:`ValueError` is
137        raised. The latter is _highly_ unlikely. It would mean to have
138        more than 2**32 hash collisions, i.e. so many files with the
139        same MD5 sum.
140        """
141        num = 1
142        while True:
143            if str(num) not in self.keys():
144                return str(num)
145            num += 1
146            if num <= 0:
147                name = getattr(self, '__name__', None)
148                raise ValueError('Basket full: %s' % name)
149
150    def storeFile(self, fd, filename):
151        """Store the file in `fd` into the basket.
152
153        The file will be stored in a Blob.
154        """
155        fd.seek(0)
156        internal_id = self.getInternalId(fd) # Moves file pointer!
157        if internal_id is None:
158            internal_id = self.curr_id
159            fd.seek(0)
160            self[internal_id] = Blob()
161            transaction.commit() # Urgently needed to make the Blob
162                                 # persistent. Took me ages to find
163                                 # out that solution, which makes some
164                                 # design flaw in ZODB Blobs likely.
165            self[internal_id].open('w').write(fd.read())
166            fd.seek(0)
167            self._p_changed = True
168        return internal_id
169
170    def retrieveFile(self, basket_id):
171        """Retrieve a file open for reading with basket id `basket_id`.
172
173        If there is no such id, ``None`` is returned. It is the
174        callers responsibility to close the open file.
175        """
176        if basket_id in self.keys():
177            return self[basket_id].open('r')
178        return None
179
180class ImageStorage(grok.Container):
181    """A container for image files.
182    """
183    def _del(self):
184        for basket in self.values():
185            try:
186                basket._del()
187            except:
188                pass
189
190    def storeFile(self, fd, filename):
191        fd.seek(0)
192        digest = md5digest(fd)
193        fd.seek(0)
194        if not digest in self.keys():
195            self[digest] = Basket()
196        basket_id = self[digest].storeFile(fd, filename)
197        full_id = "%s-%s" % (digest, basket_id)
198        return full_id
199
200    def retrieveFile(self, file_id):
201        if not '-' in file_id:
202            return None
203        full_id, basket_id = file_id.split('-', 1)
204        if not full_id in self.keys():
205            return None
206        return self[full_id].retrieveFile(basket_id)
207
208class ImageStorageFileRetrieval(Persistent):
209    grok.implements(IFileRetrieval)
210
211    def getImageStorage(self):
212        site = grok.getSite()
213        if site is None:
214            return None
215        return site.get('images', None)
216
217    def isImageStorageEnabled(self):
218        site = grok.getSite()
219        if site is None:
220            return False
221        if site.get('images', None) is None:
222            return False
223        return True
224
225    def getFile(self, data):
226        # ImageStorage is disabled, so give fall-back behaviour for
227        # testing without ImageStorage
228        if not self.isImageStorageEnabled():
229            return StringIO(data)
230        storage = self.getImageStorage()
231        if storage is None:
232            raise ValueError('Cannot find an image storage')
233        result = storage.retrieveFile(data)
234        if result is None:
235            return StringIO(data)
236        return storage.retrieveFile(data)
237
238    def createFile(self, filename, f):
239        if not self.isImageStorageEnabled():
240            return WAeUPImageFile(filename, f.read())
241        storage = self.getImageStorage()
242        if storage is None:
243            raise ValueError('Cannot find an image storage')
244        file_id = storage.storeFile(f, filename)
245        return WAeUPImageFile(filename, file_id)
246
247
248class ExtFileStore(object): #grok.GlobalUtility):
249    """External file store.
250
251    External file stores are meant to store files 'externally' of the
252    ZODB, i.e. in filesystem.
253
254    Most important attribute of the external file store is the `root`
255    path which gives the path to the location where files will be
256    stored within.
257
258    By default `root` is a ``'media/'`` directory in the root of the
259    datacenter root of a site.
260
261    The `root` attribute is 'read-only' because you normally don't
262    want to change this path -- it is dynamic. That means, if you call
263    the file store from 'within' a site, the root path will be located
264    inside this site (a :class:`waeup.sirp.University` instance). If
265    you call it from 'outside' a site some temporary dir (always the
266    same during lifetime of the file store instance) will be used. The
267    term 'temporary' tells what you can expect from this path
268    persistence-wise.
269
270    If you insist, you can pass a root path on initialization to the
271    constructor but when calling from within a site afterwards, the
272    site will override your setting for security measures. This way
273    you can safely use one file store for different sites in a Zope
274    instance simultanously and files from one site won't show up in
275    another.
276
277    An ExtFileStore instance is available as a global utility
278    implementing :iface:`waeup.sirp.interfaces.IExtFileStore`.
279
280    To add and retrieve files from the storage, use the appropriate
281    methods below.
282    """
283
284    grok.implements(IExtFileStore)
285
286    _root = None
287
288    @property
289    def root(self):
290        """Root dir of this storage.
291
292        The root dir is a readonly value determined dynamically. It
293        holds media files for sites or other components.
294
295        If a site is available we return a ``media/`` dir in the
296        datacenter storage dir.
297
298        Otherwise we create a temporary dir which will be remembered
299        on next call.
300
301        If a site exists and has a datacenter, it has always
302        precedence over temporary dirs, also after a temporary
303        directory was created.
304
305        Please note that retrieving `root` is expensive. You might
306        want to store a copy once retrieved in order to minimize the
307        number of calls to `root`.
308
309        """
310        site = grok.getSite()
311        if site is not None:
312            root = os.path.join(site['datacenter'].storage, 'media')
313            return root
314        if self._root is None:
315            self._root = tempfile.mkdtemp()
316        return self._root
317
318    def __init__(self, root=None):
319        self._root = root
320        return
321
322    def getFile(self, file_id):
323        """Get a file stored under file ID `file_id`.
324
325        If the file cannot be found ``None`` is returned.
326        """
327        marker, filename, base, ext = self.extractMarker(file_id)
328        handler = queryUtility(IFileStoreHandler, name=marker,
329                               default=DefaultFileStoreHandler())
330        path = handler.pathFromFileID(self, self.root, file_id)
331        if not os.path.exists(path):
332            return None
333        fd = open(path, 'rb')
334        return fd
335
336    def createFile(self, filename, f):
337        """Store a file.
338        """
339        file_id = filename
340        root = self.root # Calls to self.root are expensive
341        marker, filename, base, ext = self.extractMarker(file_id)
342        handler = queryUtility(IFileStoreHandler, name=marker,
343                               default=DefaultFileStoreHandler())
344        f, path, file_obj = handler.createFile(
345            self, root, file_id, filename, f)
346        dirname = os.path.dirname(path)
347        if not os.path.exists(dirname):
348            os.makedirs(dirname, 0755)
349        open(path, 'wb').write(f.read())
350        return file_obj
351
352    def extractMarker(self, file_id):
353        """split filename into marker, filename, basename, and extension.
354
355        A marker is a leading part of a string of form
356        ``__MARKERNAME__`` followed by the real filename. This way we
357        can put markers into a filename to request special processing.
358
359        Returns a quadruple
360
361          ``(marker, filename, basename, extension)``
362
363        where ``marker`` is the marker in lowercase, filename is the
364        complete trailing real filename, ``basename`` is the basename
365        of the filename and ``extension`` the filename extension of
366        the trailing filename. See examples below.
367
368        Example:
369
370           >>> extractMarker('__MaRkEr__sample.jpg')
371           ('marker', 'sample.jpg', 'sample', '.jpg')
372
373        If no marker is contained, we assume the whole string to be a
374        real filename:
375
376           >>> extractMarker('no-marker.txt')
377           ('', 'no-marker.txt', 'no-marker', '.txt')
378
379        Filenames without extension give an empty extension string:
380
381           >>> extractMarker('no-marker')
382           ('', 'no-marker', 'no-marker', '')
383
384        """
385        if not isinstance(file_id, basestring) or not file_id:
386            return ('', '', '', '')
387        parts = file_id.split('__', 2)
388        marker = ''
389        if len(parts) == 3 and parts[0] == '':
390            marker = parts[1].lower()
391            file_id = parts[2]
392        basename, ext = os.path.splitext(file_id)
393        return (marker, file_id, basename, ext)
394
395grok.global_utility(ExtFileStore, provides=IExtFileStore)
396
397class DefaultStorage(ExtFileStore):
398    grok.provides(IFileRetrieval)
399
400grok.global_utility(DefaultStorage, provides=IFileRetrieval)
401
402class DefaultFileStoreHandler(grok.GlobalUtility):
403    grok.implements(IFileStoreHandler)
404
405    def pathFromFileID(self, store, root, file_id):
406        return os.path.join(root, file_id)
407
408    def createFile(self, store, root, filename, file_id, f):
409        path = self.pathFromFileID(store, root, file_id)
410        return f, path, WAeUPImageFile(filename, file_id)
411        return path, HurryFile(filename, file_id)
Note: See TracBrowser for help on using the repository browser.