source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 7194

Last change on this file since 7194 was 7186, checked in by Henrik Bettermann, 13 years ago

Rename functions according to the WAeUP style guide:

functions and methods with property decorator with underscore

methods with CamelCase

  • Property svn:keywords set to Id
File size: 14.1 KB
RevLine 
[4188]1"""General helper functions for WAeUP.
2"""
3import os
[4375]4import re
[4188]5import sys
6import shutil
[5731]7import grok
[5848]8from cStringIO import StringIO
9from docutils.core import publish_string
[7175]10from zope.component import getUtility
[5731]11from zope.component.interfaces import IFactory
[5734]12from zope.interface import implementedBy
[6071]13from zope.schema import getFieldNames
14from zope.schema.fieldproperty import FieldProperty
[6372]15from zope.security.interfaces import NoInteraction
16from zope.security.management import getInteraction
[7175]17from zope.pluggableauth.interfaces import IAuthenticatorPlugin
[4188]18
[6503]19BUFSIZE = 8 * 1024
[6372]20
[7186]21def remove_file_or_directory(filepath):
[4188]22    """Remove a file or directory.
[5738]23
24    Different to :func:`shutil.rmtree` we also accept not existing
25    paths (returning silently) and if a dir turns out to be a regular
26    file, we remove that.
[4188]27    """
28    filepath = os.path.abspath(filepath)
29    if not os.path.exists(filepath):
30        return
31    if os.path.isdir(filepath):
32        shutil.rmtree(filepath)
33    else:
34        os.unlink(filepath)
35    return
36
[7186]37def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
[4188]38    """Copy contents of directory src to directory dst.
39
40    Both directories must exists.
41
42    If `overwrite` is true, any same named objects will be
43    overwritten. Otherwise these files will not be touched.
44
45    If `del_old` is true, copied files and directories will be removed
46    from the src directory.
47
48    This functions returns a list of non-copied files.
49
50    Unix hidden files and directories (starting with '.') are not
51    processed by this function.
52    """
53    if not os.path.exists(src):
54        raise ValueError('source path does not exist: %s' % src)
55    if not os.path.exists(dst):
56        raise ValueError('destination path does not exist: %s' % dst)
57    if not os.path.isdir(src):
58        raise ValueError('source path is not a directory: %s' % src)
59    if not os.path.isdir(dst):
60        raise ValueError('destination path is not a directory: %s' % dst)
61    not_copied = []
62    for item in os.listdir(src):
63        if item.startswith('.'):
64            continue # We do not copy hidden stuff...
65        itemsrc = os.path.join(src, item)
66        itemdst = os.path.join(dst, item)
67
68        if os.path.exists(itemdst):
69            if overwrite is True:
[7186]70                remove_file_or_directory(itemdst)
[4188]71            else:
72                not_copied.append(item)
73                continue
[6113]74
[4188]75        if os.path.isdir(itemsrc):
76            shutil.copytree(itemsrc, itemdst)
77        else:
78            shutil.copy2(itemsrc, itemdst)
79        if del_old:
[7186]80            remove_file_or_directory(itemsrc)
[4188]81    return not_copied
[4375]82
83
[7186]84def get_inner_HTML_part(html_code):
[4375]85    """Return the 'inner' part of a complete HTML snippet.
86
87    If there is a form part, get this.
88
89    If there is no form part, try to return the body part contents.
90
91    If there is no body, return as-is.
[5738]92
93    Let's see how that works. If we deliver some doc with form, we
94    will get that form only:
95
96       >>> doc = '<html><form>My Form</form>Outside the form</html>'
[7186]97       >>> get_inner_HTML_part(doc)
[5738]98       '<form>My Form</form>'
99
100    No form? Then seek for a body part and get the contents:
101
102       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
[7186]103       >>> get_inner_HTML_part(doc)
[5738]104       'My Body'
105
106    If none of these is included, return what we got:
107
108       >>> doc = '<html>without body nor form</html>'
[7186]109       >>> get_inner_HTML_part(doc)
[5738]110       '<html>without body nor form</html>'
111
[4375]112    """
113
114    try:
[5738]115        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
[4375]116                          re.DOTALL).groups()[0]
117        return result
118    except AttributeError:
119        # No <form> part included
120        try:
121            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
122                              re.DOTALL).groups()[0]
123            return result
124        except AttributeError:
125            # No <form> and no <body> tag...
126            pass
127    return html_code
128
[5731]129class FactoryBase(grok.GlobalUtility):
130    """A factory for things.
131
132    This is a baseclass for easier creation of factories. Factories
133    are utilities that are registered under a certain name and return
134    instances of certain classes when called.
135
136    In :mod:`waeup.sirp` we use factories extensively for
137    batching. While processing a batch some importer looks up a
138    factory to create real-world instances that then get filled with
139    data from imported CSV files.
140
141    To get rid of reimplementing the same stuff over and over again,
142    most notably the methods defined here, we offer this base class
143    (which will *not* be registered as a factory itself).
144
145    Real factories can then be created like this:
146
147       >>> import grok
148       >>> from waeup.sirp.utils.helpers import FactoryBase
149       >>> class MyObject(object):
150       ...   # Some class we want to get instances of.
151       ...   pass
152       >>> class MyObjectFactory(FactoryBase):
153       ...   # This is the factory for MyObject instances
154       ...   grok.name(u'waeup.sirp.factory.MyObject')
155       ...   factory = MyObject
156
157    That's it. It is essential to set the ``factory`` attribute, which
158    will determine the class of which instances should be created when
159    called. The given name must even be unique amongst all utilities
160    registered during runtime. While you can pick any name you like
161    you might want to prepend ``waeup.sirp.factory.`` to the name
162    string to make sure it does not clash with names of other
163    utilities one day.
164
165    Before all this works we have to grok the baseclass once and our
166    freshly defined factory. This executes all the component
167    registration stuff we don't want to do ourselves. In daily use
168    this is done automatically on startup of a :mod:`waeup.sirp`
169    system.
[6113]170
[5731]171       >>> grok.testing.grok('waeup.sirp.utils.helpers')
172       >>> grok.testing.grok_component(
173       ...    'MyObjectFactory', MyObjectFactory
174       ...  )
175       True
176
177    After grokking we (and importers) can create objects without
178    knowing about the location of the real class definition, just by
179    the factory name:
180
181       >>> from zope.component import createObject
182       >>> obj = createObject('waeup.sirp.factory.MyObject')
183       >>> isinstance(obj, MyObject)
184       True
185
186    We can also use the regular utility lookups to find our new
187    factory:
188
189       >>> from zope.component import getUtility
190       >>> from zope.component.interfaces import IFactory
191       >>> factory = getUtility(
192       ...   IFactory, name='waeup.sirp.factory.MyObject'
193       ...   )
194       >>> isinstance(factory, MyObjectFactory)
195       True
196
197    And this factory generates `MyObject` instances:
198
199       >>> obj = factory()
200       >>> isinstance(obj, MyObject)
201       True
202
203    """
204    grok.baseclass() # Do not grok this class, do not register us.
205    grok.implements(IFactory)
206    # You can override any of the following attributes in derived
207    # classes. The `grok.name` setting *must* even be set to some
208    # unique value.
209    grok.name(u'waeup.Factory')
210    title = u"Create instances of ``factory``.",
211    description = u"This factory instantiates new applicant instances."
212    factory = None
213
214    def __call__(self, *args, **kw):
215        """The main factory function.
216
217        Returns an instance of the requested object.
218        """
219        return self.factory()
220
221    def getInterfaces(self):
222        # Required by IFactory
223        return implementedBy(self.factory)
[5848]224
225def ReST2HTML_w_warnings(source_string):
226    """Convert a reStructuredText string to HTML preserving warnings.
227
228    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
229    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
[5876]230    source string (in unicode), ``<WARNINGS>`` is a string containing
231    any warning messages or ``None``.
[6113]232
[5848]233    Regular multi-line ReStructuredText strings will be returned as
234    HTML code:
235
236        >>> from waeup.sirp.utils.helpers import ReST2HTML
237        >>> source = '''
238        ... Headline
239        ... ========
240        ...
241        ... - A list item
242        ... - Another item
243        ...
244        ... Thanks for watching!
245        ... '''
246        >>> html, warnings = ReST2HTML_w_warnings(source)
247        >>> print html
248        <div class="document" id="headline">
249        <h1 class="title">Headline</h1>
250        <BLANKLINE>
251        <ul class="simple">
252        <li>A list item</li>
253        <li>Another item</li>
254        </ul>
255        <p>Thanks for watching!</p>
256        </div>
257
258    Here no warnings happened, so the `warnings` are ``None``:
259
260        >>> warnings is None
261        True
[6113]262
[5848]263    If warnings happen then they can be retrieved in the returned
264    ``warnings``. We try to render an erraneous document:
265
266        >>> source = '''
267        ... Headline
268        ... ======
269        ...
270        ... Thanks for watching!
271        ... '''
272        >>> html, warnings = ReST2HTML_w_warnings(source)
273        >>> print html
274        <div class="document" id="headline">
275        <h1 class="title">Headline</h1>
276        <BLANKLINE>
277        <p>Thanks for watching!</p>
278        </div>
279
280        >>> print warnings
281        <string>:3: (WARNING/2) Title underline too short.
282        <BLANKLINE>
283        Headline
284        ======
285        <BLANKLINE>
286
287    As you can see, the warnings are not displayed inline the document
288    but can be retrieved from the returned warnings, which is a string
289    or ``None``.
290    """
291    warnings = StringIO()
292    fulldoc = publish_string(
293        source_string, writer_name='html4css1',
294        settings_overrides={
295            'report_level': 0,
296            'warning_stream': warnings,
297            })
298    warnings.seek(0)
299    warning_msgs = warnings.read()
300    if warning_msgs:
301        # Render again, this time with no warnings inline...
302        fulldoc =  publish_string(
303        source_string, writer_name='html4css1',
304        settings_overrides={
305            'report_level': 10000,
306            'halt_level': 10000,
307            'warning_stream': warnings,
308            })
309    if warning_msgs == '':
310        warning_msgs = None
[7186]311    result = get_inner_HTML_part(fulldoc).strip()
[5876]312    if not isinstance(result, unicode):
313        result = result.decode('utf-8')
314    return result, warning_msgs
[5848]315
316def ReST2HTML(source_string):
317    """Render a string containing ReStructuredText to HTML.
318
319    Any warnings about too short headings, etc. are silently
320    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
321    warnings.
322
[5876]323    The returned string will be unicode.
[6113]324
[5848]325    A regular document will be rendered like this:
326
327        >>> source = '''
328        ... Headline
329        ... ========
330        ...
331        ... Thanks for watching!
332        ... '''
333        >>> html = ReST2HTML(source)
334        >>> print html
335        <div class="document" id="headline">
336        <h1 class="title">Headline</h1>
337        <BLANKLINE>
338        <p>Thanks for watching!</p>
339        </div>
340
341    A document with markup problems (here: the underline is too short)
342    will look similar:
343
344        >>> source = '''
345        ... Headline
346        ... ======
347        ...
348        ... Thanks for watching!
349        ... '''
350        >>> html = ReST2HTML(source)
351        >>> print html
352        <div class="document" id="headline">
353        <h1 class="title">Headline</h1>
354        <BLANKLINE>
355        <p>Thanks for watching!</p>
356        </div>
[6113]357
[5848]358    """
359    html, warnings = ReST2HTML_w_warnings(source_string)
360    return html
[6071]361
362def attrs_to_fields(cls):
363    """Turn the attributes of a class into FieldProperty instances.
[6113]364
365    With Python >= 2.6 we can even use this function as a class decorator.
[6071]366    """
367    iface = list(implementedBy(cls))[0]
368    for field_name in getFieldNames(iface):
369        setattr(cls, field_name, FieldProperty(iface[field_name]))
370    return cls
[6372]371
372def get_current_principal():
373    """Get the 'current' principal.
374
375    This method works without a request. Examining a request is the
376    regular (and recommended) way to get a principal involved
377    'currently'.
378
379    Use this method only if you really have no access to the current
380    request.
381
382    Returns ``None`` when no principal is involved (for instance
383    during tests).
384    """
385    try:
386        principal = getInteraction().participations[0].principal
387    except NoInteraction:
388        return None
389    except IndexError: # No participations present
390        return None
391    return principal
[6503]392
393def cmp_files(file_descr1, file_descr2):
394    """Compare two files by their file descriptors.
395
396    Returns ``True`` if both are equal, ``False`` otherwise.
397    """
[6531]398    file_descr1.seek(0)
399    file_descr2.seek(0)
[6503]400    while True:
401        b1 = file_descr1.read(BUFSIZE)
402        b2 = file_descr2.read(BUFSIZE)
403        if b1 != b2:
404            return False
405        if not b1:
406            return True
[7078]407
408def string_from_bytes(number):
409    """Turn a number into some textual representation.
410
411      Examples:
412
413        >>> string_from_bytes(1)
414        u'1 byte(s)'
415
416        >>> string_from_bytes(1025)
417        u'1 KB'
418
419        >>> string_from_bytes(1.5 * 1024*1024)
420        u'1.50 MB'
421
422        >>> string_from_bytes(673.286 * 1024**3)
423        u'673.29 GB'
424
425    """
426    if number < 1024:
427        return u'%s byte(s)' % (str(number),)
428    elif number < 1024**2:
429        return u'%s KB' % (number / 1024,)
430    elif number < 1024**3:
431        return u'%.2f MB' % (number / 1024**2,)
432    return u'%.2f GB' % (number / 1024**3,)
[7079]433
434def file_size(file_like_obj):
435    """Determine file size in most effective manner.
436
437    Returns the number of bytes in a file. This function works for
438    both, real files as well as file-like objects like cStringIO based
439    'files'.
440
441    Example:
442
443      >>> from cStringIO import StringIO
444      >>> file_size(StringIO('my file content'))
445      15
446
447    Please note that this function expects the file-like object passed
448    in to be at first reading position (it does no seek(0)) and that
449    when finished the file pointer might be at end of file.
450    """
451    if hasattr(file_like_obj, 'fileno'):
452        return os.fstat(file_like_obj.fileno())[6]
453    file_like_obj.seek(0, 2) # seek to last position in file
454    return file_like_obj.tell()
[7175]455
456def get_user_account(request):
457    """Return local user account.
458    """
459    principal_id = request.principal.id
460    authenticator = getUtility(IAuthenticatorPlugin, name='users')
461    account = authenticator.getAccount(principal_id)
462    return account
463
Note: See TracBrowser for help on using the repository browser.