source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 7101

Last change on this file since 7101 was 7079, checked in by uli, 13 years ago

Add helper to determine size of files reliably and for all regular
kinds of file/file-like objects.

File size: 13.7 KB
Line 
1"""General helper functions for WAeUP.
2"""
3import os
4import re
5import sys
6import shutil
7import grok
8from cStringIO import StringIO
9from docutils.core import publish_string
10from zope.component.interfaces import IFactory
11from zope.interface import implementedBy
12from zope.schema import getFieldNames
13from zope.schema.fieldproperty import FieldProperty
14from zope.security.interfaces import NoInteraction
15from zope.security.management import getInteraction
16
17BUFSIZE = 8 * 1024
18
19def removeFileOrDirectory(filepath):
20    """Remove a file or directory.
21
22    Different to :func:`shutil.rmtree` we also accept not existing
23    paths (returning silently) and if a dir turns out to be a regular
24    file, we remove that.
25    """
26    filepath = os.path.abspath(filepath)
27    if not os.path.exists(filepath):
28        return
29    if os.path.isdir(filepath):
30        shutil.rmtree(filepath)
31    else:
32        os.unlink(filepath)
33    return
34
35def copyFileSystemTree(src, dst, overwrite=False, del_old=False):
36    """Copy contents of directory src to directory dst.
37
38    Both directories must exists.
39
40    If `overwrite` is true, any same named objects will be
41    overwritten. Otherwise these files will not be touched.
42
43    If `del_old` is true, copied files and directories will be removed
44    from the src directory.
45
46    This functions returns a list of non-copied files.
47
48    Unix hidden files and directories (starting with '.') are not
49    processed by this function.
50    """
51    if not os.path.exists(src):
52        raise ValueError('source path does not exist: %s' % src)
53    if not os.path.exists(dst):
54        raise ValueError('destination path does not exist: %s' % dst)
55    if not os.path.isdir(src):
56        raise ValueError('source path is not a directory: %s' % src)
57    if not os.path.isdir(dst):
58        raise ValueError('destination path is not a directory: %s' % dst)
59    not_copied = []
60    for item in os.listdir(src):
61        if item.startswith('.'):
62            continue # We do not copy hidden stuff...
63        itemsrc = os.path.join(src, item)
64        itemdst = os.path.join(dst, item)
65
66        if os.path.exists(itemdst):
67            if overwrite is True:
68                removeFileOrDirectory(itemdst)
69            else:
70                not_copied.append(item)
71                continue
72
73        if os.path.isdir(itemsrc):
74            shutil.copytree(itemsrc, itemdst)
75        else:
76            shutil.copy2(itemsrc, itemdst)
77        if del_old:
78            removeFileOrDirectory(itemsrc)
79    return not_copied
80
81
82def getInnerHTMLPart(html_code):
83    """Return the 'inner' part of a complete HTML snippet.
84
85    If there is a form part, get this.
86
87    If there is no form part, try to return the body part contents.
88
89    If there is no body, return as-is.
90
91    Let's see how that works. If we deliver some doc with form, we
92    will get that form only:
93
94       >>> doc = '<html><form>My Form</form>Outside the form</html>'
95       >>> getInnerHTMLPart(doc)
96       '<form>My Form</form>'
97
98    No form? Then seek for a body part and get the contents:
99
100       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
101       >>> getInnerHTMLPart(doc)
102       'My Body'
103
104    If none of these is included, return what we got:
105
106       >>> doc = '<html>without body nor form</html>'
107       >>> getInnerHTMLPart(doc)
108       '<html>without body nor form</html>'
109
110    """
111
112    try:
113        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
114                          re.DOTALL).groups()[0]
115        return result
116    except AttributeError:
117        # No <form> part included
118        try:
119            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
120                              re.DOTALL).groups()[0]
121            return result
122        except AttributeError:
123            # No <form> and no <body> tag...
124            pass
125    return html_code
126
127class FactoryBase(grok.GlobalUtility):
128    """A factory for things.
129
130    This is a baseclass for easier creation of factories. Factories
131    are utilities that are registered under a certain name and return
132    instances of certain classes when called.
133
134    In :mod:`waeup.sirp` we use factories extensively for
135    batching. While processing a batch some importer looks up a
136    factory to create real-world instances that then get filled with
137    data from imported CSV files.
138
139    To get rid of reimplementing the same stuff over and over again,
140    most notably the methods defined here, we offer this base class
141    (which will *not* be registered as a factory itself).
142
143    Real factories can then be created like this:
144
145       >>> import grok
146       >>> from waeup.sirp.utils.helpers import FactoryBase
147       >>> class MyObject(object):
148       ...   # Some class we want to get instances of.
149       ...   pass
150       >>> class MyObjectFactory(FactoryBase):
151       ...   # This is the factory for MyObject instances
152       ...   grok.name(u'waeup.sirp.factory.MyObject')
153       ...   factory = MyObject
154
155    That's it. It is essential to set the ``factory`` attribute, which
156    will determine the class of which instances should be created when
157    called. The given name must even be unique amongst all utilities
158    registered during runtime. While you can pick any name you like
159    you might want to prepend ``waeup.sirp.factory.`` to the name
160    string to make sure it does not clash with names of other
161    utilities one day.
162
163    Before all this works we have to grok the baseclass once and our
164    freshly defined factory. This executes all the component
165    registration stuff we don't want to do ourselves. In daily use
166    this is done automatically on startup of a :mod:`waeup.sirp`
167    system.
168
169       >>> grok.testing.grok('waeup.sirp.utils.helpers')
170       >>> grok.testing.grok_component(
171       ...    'MyObjectFactory', MyObjectFactory
172       ...  )
173       True
174
175    After grokking we (and importers) can create objects without
176    knowing about the location of the real class definition, just by
177    the factory name:
178
179       >>> from zope.component import createObject
180       >>> obj = createObject('waeup.sirp.factory.MyObject')
181       >>> isinstance(obj, MyObject)
182       True
183
184    We can also use the regular utility lookups to find our new
185    factory:
186
187       >>> from zope.component import getUtility
188       >>> from zope.component.interfaces import IFactory
189       >>> factory = getUtility(
190       ...   IFactory, name='waeup.sirp.factory.MyObject'
191       ...   )
192       >>> isinstance(factory, MyObjectFactory)
193       True
194
195    And this factory generates `MyObject` instances:
196
197       >>> obj = factory()
198       >>> isinstance(obj, MyObject)
199       True
200
201    """
202    grok.baseclass() # Do not grok this class, do not register us.
203    grok.implements(IFactory)
204    # You can override any of the following attributes in derived
205    # classes. The `grok.name` setting *must* even be set to some
206    # unique value.
207    grok.name(u'waeup.Factory')
208    title = u"Create instances of ``factory``.",
209    description = u"This factory instantiates new applicant instances."
210    factory = None
211
212    def __call__(self, *args, **kw):
213        """The main factory function.
214
215        Returns an instance of the requested object.
216        """
217        return self.factory()
218
219    def getInterfaces(self):
220        # Required by IFactory
221        return implementedBy(self.factory)
222
223def ReST2HTML_w_warnings(source_string):
224    """Convert a reStructuredText string to HTML preserving warnings.
225
226    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
227    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
228    source string (in unicode), ``<WARNINGS>`` is a string containing
229    any warning messages or ``None``.
230
231    Regular multi-line ReStructuredText strings will be returned as
232    HTML code:
233
234        >>> from waeup.sirp.utils.helpers import ReST2HTML
235        >>> source = '''
236        ... Headline
237        ... ========
238        ...
239        ... - A list item
240        ... - Another item
241        ...
242        ... Thanks for watching!
243        ... '''
244        >>> html, warnings = ReST2HTML_w_warnings(source)
245        >>> print html
246        <div class="document" id="headline">
247        <h1 class="title">Headline</h1>
248        <BLANKLINE>
249        <ul class="simple">
250        <li>A list item</li>
251        <li>Another item</li>
252        </ul>
253        <p>Thanks for watching!</p>
254        </div>
255
256    Here no warnings happened, so the `warnings` are ``None``:
257
258        >>> warnings is None
259        True
260
261    If warnings happen then they can be retrieved in the returned
262    ``warnings``. We try to render an erraneous document:
263
264        >>> source = '''
265        ... Headline
266        ... ======
267        ...
268        ... Thanks for watching!
269        ... '''
270        >>> html, warnings = ReST2HTML_w_warnings(source)
271        >>> print html
272        <div class="document" id="headline">
273        <h1 class="title">Headline</h1>
274        <BLANKLINE>
275        <p>Thanks for watching!</p>
276        </div>
277
278        >>> print warnings
279        <string>:3: (WARNING/2) Title underline too short.
280        <BLANKLINE>
281        Headline
282        ======
283        <BLANKLINE>
284
285    As you can see, the warnings are not displayed inline the document
286    but can be retrieved from the returned warnings, which is a string
287    or ``None``.
288    """
289    warnings = StringIO()
290    fulldoc = publish_string(
291        source_string, writer_name='html4css1',
292        settings_overrides={
293            'report_level': 0,
294            'warning_stream': warnings,
295            })
296    warnings.seek(0)
297    warning_msgs = warnings.read()
298    if warning_msgs:
299        # Render again, this time with no warnings inline...
300        fulldoc =  publish_string(
301        source_string, writer_name='html4css1',
302        settings_overrides={
303            'report_level': 10000,
304            'halt_level': 10000,
305            'warning_stream': warnings,
306            })
307    if warning_msgs == '':
308        warning_msgs = None
309    result = getInnerHTMLPart(fulldoc).strip()
310    if not isinstance(result, unicode):
311        result = result.decode('utf-8')
312    return result, warning_msgs
313
314def ReST2HTML(source_string):
315    """Render a string containing ReStructuredText to HTML.
316
317    Any warnings about too short headings, etc. are silently
318    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
319    warnings.
320
321    The returned string will be unicode.
322
323    A regular document will be rendered like this:
324
325        >>> source = '''
326        ... Headline
327        ... ========
328        ...
329        ... Thanks for watching!
330        ... '''
331        >>> html = ReST2HTML(source)
332        >>> print html
333        <div class="document" id="headline">
334        <h1 class="title">Headline</h1>
335        <BLANKLINE>
336        <p>Thanks for watching!</p>
337        </div>
338
339    A document with markup problems (here: the underline is too short)
340    will look similar:
341
342        >>> source = '''
343        ... Headline
344        ... ======
345        ...
346        ... Thanks for watching!
347        ... '''
348        >>> html = ReST2HTML(source)
349        >>> print html
350        <div class="document" id="headline">
351        <h1 class="title">Headline</h1>
352        <BLANKLINE>
353        <p>Thanks for watching!</p>
354        </div>
355
356    """
357    html, warnings = ReST2HTML_w_warnings(source_string)
358    return html
359
360def attrs_to_fields(cls):
361    """Turn the attributes of a class into FieldProperty instances.
362
363    With Python >= 2.6 we can even use this function as a class decorator.
364    """
365    iface = list(implementedBy(cls))[0]
366    for field_name in getFieldNames(iface):
367        setattr(cls, field_name, FieldProperty(iface[field_name]))
368    return cls
369
370def get_current_principal():
371    """Get the 'current' principal.
372
373    This method works without a request. Examining a request is the
374    regular (and recommended) way to get a principal involved
375    'currently'.
376
377    Use this method only if you really have no access to the current
378    request.
379
380    Returns ``None`` when no principal is involved (for instance
381    during tests).
382    """
383    try:
384        principal = getInteraction().participations[0].principal
385    except NoInteraction:
386        return None
387    except IndexError: # No participations present
388        return None
389    return principal
390
391def cmp_files(file_descr1, file_descr2):
392    """Compare two files by their file descriptors.
393
394    Returns ``True`` if both are equal, ``False`` otherwise.
395    """
396    file_descr1.seek(0)
397    file_descr2.seek(0)
398    while True:
399        b1 = file_descr1.read(BUFSIZE)
400        b2 = file_descr2.read(BUFSIZE)
401        if b1 != b2:
402            return False
403        if not b1:
404            return True
405
406def string_from_bytes(number):
407    """Turn a number into some textual representation.
408
409      Examples:
410
411        >>> string_from_bytes(1)
412        u'1 byte(s)'
413
414        >>> string_from_bytes(1025)
415        u'1 KB'
416
417        >>> string_from_bytes(1.5 * 1024*1024)
418        u'1.50 MB'
419
420        >>> string_from_bytes(673.286 * 1024**3)
421        u'673.29 GB'
422
423    """
424    if number < 1024:
425        return u'%s byte(s)' % (str(number),)
426    elif number < 1024**2:
427        return u'%s KB' % (number / 1024,)
428    elif number < 1024**3:
429        return u'%.2f MB' % (number / 1024**2,)
430    return u'%.2f GB' % (number / 1024**3,)
431
432def file_size(file_like_obj):
433    """Determine file size in most effective manner.
434
435    Returns the number of bytes in a file. This function works for
436    both, real files as well as file-like objects like cStringIO based
437    'files'.
438
439    Example:
440
441      >>> from cStringIO import StringIO
442      >>> file_size(StringIO('my file content'))
443      15
444
445    Please note that this function expects the file-like object passed
446    in to be at first reading position (it does no seek(0)) and that
447    when finished the file pointer might be at end of file.
448    """
449    if hasattr(file_like_obj, 'fileno'):
450        return os.fstat(file_like_obj.fileno())[6]
451    file_like_obj.seek(0, 2) # seek to last position in file
452    return file_like_obj.tell()
Note: See TracBrowser for help on using the repository browser.