source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 8420

Last change on this file since 8420 was 8370, checked in by uli, 13 years ago

Fix iface_names().

  • Property svn:keywords set to Id
File size: 18.4 KB
Line 
1## $Id: helpers.py 8370 2012-05-06 13:18:17Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""General helper functions for Kofa.
19"""
20import datetime
21import os
22import pytz
23import re
24import shutil
25import grok
26from cStringIO import StringIO
27from docutils.core import publish_string
28from zope.component import getUtility
29from zope.component.interfaces import IFactory
30from zope.interface import implementedBy
31from zope.interface.interface import Method, Attribute
32from zope.schema import getFieldNames
33from zope.schema.fieldproperty import FieldProperty
34from zope.security.interfaces import NoInteraction
35from zope.security.management import getInteraction
36from zope.pluggableauth.interfaces import IAuthenticatorPlugin
37
38BUFSIZE = 8 * 1024
39
40def remove_file_or_directory(filepath):
41    """Remove a file or directory.
42
43    Different to :func:`shutil.rmtree` we also accept not existing
44    paths (returning silently) and if a dir turns out to be a regular
45    file, we remove that.
46    """
47    filepath = os.path.abspath(filepath)
48    if not os.path.exists(filepath):
49        return
50    if os.path.isdir(filepath):
51        shutil.rmtree(filepath)
52    else:
53        os.unlink(filepath)
54    return
55
56def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
57    """Copy contents of directory src to directory dst.
58
59    Both directories must exists.
60
61    If `overwrite` is true, any same named objects will be
62    overwritten. Otherwise these files will not be touched.
63
64    If `del_old` is true, copied files and directories will be removed
65    from the src directory.
66
67    This functions returns a list of non-copied files.
68
69    Unix hidden files and directories (starting with '.') are not
70    processed by this function.
71    """
72    if not os.path.exists(src):
73        raise ValueError('source path does not exist: %s' % src)
74    if not os.path.exists(dst):
75        raise ValueError('destination path does not exist: %s' % dst)
76    if not os.path.isdir(src):
77        raise ValueError('source path is not a directory: %s' % src)
78    if not os.path.isdir(dst):
79        raise ValueError('destination path is not a directory: %s' % dst)
80    not_copied = []
81    for item in os.listdir(src):
82        if item.startswith('.'):
83            continue # We do not copy hidden stuff...
84        itemsrc = os.path.join(src, item)
85        itemdst = os.path.join(dst, item)
86
87        if os.path.exists(itemdst):
88            if overwrite is True:
89                remove_file_or_directory(itemdst)
90            else:
91                not_copied.append(item)
92                continue
93
94        if os.path.isdir(itemsrc):
95            shutil.copytree(itemsrc, itemdst)
96        else:
97            shutil.copy2(itemsrc, itemdst)
98        if del_old:
99            remove_file_or_directory(itemsrc)
100    return not_copied
101
102
103def get_inner_HTML_part(html_code):
104    """Return the 'inner' part of a complete HTML snippet.
105
106    If there is a form part, get this.
107
108    If there is no form part, try to return the body part contents.
109
110    If there is no body, return as-is.
111
112    Let's see how that works. If we deliver some doc with form, we
113    will get that form only:
114
115       >>> doc = '<html><form>My Form</form>Outside the form</html>'
116       >>> get_inner_HTML_part(doc)
117       '<form>My Form</form>'
118
119    No form? Then seek for a body part and get the contents:
120
121       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
122       >>> get_inner_HTML_part(doc)
123       'My Body'
124
125    If none of these is included, return what we got:
126
127       >>> doc = '<html>without body nor form</html>'
128       >>> get_inner_HTML_part(doc)
129       '<html>without body nor form</html>'
130
131    """
132
133    try:
134        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
135                          re.DOTALL).groups()[0]
136        return result
137    except AttributeError:
138        # No <form> part included
139        try:
140            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
141                              re.DOTALL).groups()[0]
142            return result
143        except AttributeError:
144            # No <form> and no <body> tag...
145            pass
146    return html_code
147
148class FactoryBase(grok.GlobalUtility):
149    """A factory for things.
150
151    This is a baseclass for easier creation of factories. Factories
152    are utilities that are registered under a certain name and return
153    instances of certain classes when called.
154
155    In :mod:`waeup.kofa` we use factories extensively for
156    batching. While processing a batch some processors looks up a
157    factory to create real-world instances that then get filled with
158    data from imported CSV files.
159
160    To get rid of reimplementing the same stuff over and over again,
161    most notably the methods defined here, we offer this base class
162    (which will *not* be registered as a factory itself).
163
164    Real factories can then be created like this:
165
166       >>> import grok
167       >>> from waeup.kofa.utils.helpers import FactoryBase
168       >>> class MyObject(object):
169       ...   # Some class we want to get instances of.
170       ...   pass
171       >>> class MyObjectFactory(FactoryBase):
172       ...   # This is the factory for MyObject instances
173       ...   grok.name(u'waeup.kofa.factory.MyObject')
174       ...   factory = MyObject
175
176    That's it. It is essential to set the ``factory`` attribute, which
177    will determine the class of which instances should be created when
178    called. The given name must even be unique amongst all utilities
179    registered during runtime. While you can pick any name you like
180    you might want to prepend ``waeup.kofa.factory.`` to the name
181    string to make sure it does not clash with names of other
182    utilities one day.
183
184    Before all this works we have to grok the baseclass once and our
185    freshly defined factory. This executes all the component
186    registration stuff we don't want to do ourselves. In daily use
187    this is done automatically on startup of a :mod:`waeup.kofa`
188    system.
189
190       >>> grok.testing.grok('waeup.kofa.utils.helpers')
191       >>> grok.testing.grok_component(
192       ...    'MyObjectFactory', MyObjectFactory
193       ...  )
194       True
195
196    After grokking we (and processors) can create objects without
197    knowing about the location of the real class definition, just by
198    the factory name:
199
200       >>> from zope.component import createObject
201       >>> obj = createObject('waeup.kofa.factory.MyObject')
202       >>> isinstance(obj, MyObject)
203       True
204
205    We can also use the regular utility lookups to find our new
206    factory:
207
208       >>> from zope.component import getUtility
209       >>> from zope.component.interfaces import IFactory
210       >>> factory = getUtility(
211       ...   IFactory, name='waeup.kofa.factory.MyObject'
212       ...   )
213       >>> isinstance(factory, MyObjectFactory)
214       True
215
216    And this factory generates `MyObject` instances:
217
218       >>> obj = factory()
219       >>> isinstance(obj, MyObject)
220       True
221
222    """
223    grok.baseclass() # Do not grok this class, do not register us.
224    grok.implements(IFactory)
225    # You can override any of the following attributes in derived
226    # classes. The `grok.name` setting *must* even be set to some
227    # unique value.
228    grok.name(u'waeup.Factory')
229    title = u"Create instances of ``factory``.",
230    description = u"This factory instantiates new applicant instances."
231    factory = None
232
233    def __call__(self, *args, **kw):
234        """The main factory function.
235
236        Returns an instance of the requested object.
237        """
238        return self.factory()
239
240    def getInterfaces(self):
241        # Required by IFactory
242        return implementedBy(self.factory)
243
244def ReST2HTML_w_warnings(source_string):
245    """Convert a reStructuredText string to HTML preserving warnings.
246
247    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
248    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
249    source string (in unicode), ``<WARNINGS>`` is a string containing
250    any warning messages or ``None``.
251
252    Regular multi-line ReStructuredText strings will be returned as
253    HTML code:
254
255        >>> from waeup.kofa.utils.helpers import ReST2HTML
256        >>> source = '''
257        ... Headline
258        ... ========
259        ...
260        ... - A list item
261        ... - Another item
262        ...
263        ... Thanks for watching!
264        ... '''
265        >>> html, warnings = ReST2HTML_w_warnings(source)
266        >>> print html
267        <div class="document" id="headline">
268        <h1 class="title">Headline</h1>
269        <BLANKLINE>
270        <ul class="simple">
271        <li>A list item</li>
272        <li>Another item</li>
273        </ul>
274        <p>Thanks for watching!</p>
275        </div>
276
277    Here no warnings happened, so the `warnings` are ``None``:
278
279        >>> warnings is None
280        True
281
282    If warnings happen then they can be retrieved in the returned
283    ``warnings``. We try to render an erraneous document:
284
285        >>> source = '''
286        ... Headline
287        ... ======
288        ...
289        ... Thanks for watching!
290        ... '''
291        >>> html, warnings = ReST2HTML_w_warnings(source)
292        >>> print html
293        <div class="document" id="headline">
294        <h1 class="title">Headline</h1>
295        <BLANKLINE>
296        <p>Thanks for watching!</p>
297        </div>
298
299        >>> print warnings
300        <string>:3: (WARNING/2) Title underline too short.
301        <BLANKLINE>
302        Headline
303        ======
304        <BLANKLINE>
305
306    As you can see, the warnings are not displayed inline the document
307    but can be retrieved from the returned warnings, which is a string
308    or ``None``.
309    """
310    warnings = StringIO()
311    fulldoc = publish_string(
312        source_string, writer_name='html4css1',
313        settings_overrides={
314            'report_level': 0,
315            'warning_stream': warnings,
316            })
317    warnings.seek(0)
318    warning_msgs = warnings.read()
319    if warning_msgs:
320        # Render again, this time with no warnings inline...
321        fulldoc =  publish_string(
322        source_string, writer_name='html4css1',
323        settings_overrides={
324            'report_level': 10000,
325            'halt_level': 10000,
326            'warning_stream': warnings,
327            })
328    if warning_msgs == '':
329        warning_msgs = None
330    result = get_inner_HTML_part(fulldoc).strip()
331    if not isinstance(result, unicode):
332        result = result.decode('utf-8')
333    return result, warning_msgs
334
335def ReST2HTML(source_string):
336    """Render a string containing ReStructuredText to HTML.
337
338    Any warnings about too short headings, etc. are silently
339    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
340    warnings.
341
342    The returned string will be unicode.
343
344    A regular document will be rendered like this:
345
346        >>> source = '''
347        ... Headline
348        ... ========
349        ...
350        ... Thanks for watching!
351        ... '''
352        >>> html = ReST2HTML(source)
353        >>> print html
354        <div class="document" id="headline">
355        <h1 class="title">Headline</h1>
356        <BLANKLINE>
357        <p>Thanks for watching!</p>
358        </div>
359
360    A document with markup problems (here: the underline is too short)
361    will look similar:
362
363        >>> source = '''
364        ... Headline
365        ... ======
366        ...
367        ... Thanks for watching!
368        ... '''
369        >>> html = ReST2HTML(source)
370        >>> print html
371        <div class="document" id="headline">
372        <h1 class="title">Headline</h1>
373        <BLANKLINE>
374        <p>Thanks for watching!</p>
375        </div>
376
377    """
378    html, warnings = ReST2HTML_w_warnings(source_string)
379    return html
380
381def attrs_to_fields(cls):
382    """Turn the attributes of a class into FieldProperty instances.
383
384    With Python >= 2.6 we can even use this function as a class decorator.
385    """
386    iface = list(implementedBy(cls))[0]
387    for field_name in getFieldNames(iface):
388        setattr(cls, field_name, FieldProperty(iface[field_name]))
389    return cls
390
391def get_current_principal():
392    """Get the 'current' principal.
393
394    This method works without a request. Examining a request is the
395    regular (and recommended) way to get a principal involved
396    'currently'.
397
398    Use this method only if you really have no access to the current
399    request.
400
401    Returns ``None`` when no principal is involved (for instance
402    during tests).
403    """
404    try:
405        principal = getInteraction().participations[0].principal
406    except NoInteraction:
407        return None
408    except IndexError: # No participations present
409        return None
410    return principal
411
412def cmp_files(file_descr1, file_descr2):
413    """Compare two files by their file descriptors.
414
415    Returns ``True`` if both are equal, ``False`` otherwise.
416    """
417    file_descr1.seek(0)
418    file_descr2.seek(0)
419    while True:
420        b1 = file_descr1.read(BUFSIZE)
421        b2 = file_descr2.read(BUFSIZE)
422        if b1 != b2:
423            return False
424        if not b1:
425            return True
426
427def string_from_bytes(number):
428    """Turn a number into some textual representation.
429
430      Examples:
431
432        >>> string_from_bytes(1)
433        u'1 byte(s)'
434
435        >>> string_from_bytes(1025)
436        u'1 KB'
437
438        >>> string_from_bytes(1.5 * 1024*1024)
439        u'1.50 MB'
440
441        >>> string_from_bytes(673.286 * 1024**3)
442        u'673.29 GB'
443
444    """
445    if number < 1024:
446        return u'%s byte(s)' % (str(number),)
447    elif number < 1024**2:
448        return u'%s KB' % (number / 1024,)
449    elif number < 1024**3:
450        return u'%.2f MB' % (number / 1024**2,)
451    return u'%.2f GB' % (number / 1024**3,)
452
453def file_size(file_like_obj):
454    """Determine file size in most effective manner.
455
456    Returns the number of bytes in a file. This function works for
457    both, real files as well as file-like objects like cStringIO based
458    'files'.
459
460    Example:
461
462      >>> from cStringIO import StringIO
463      >>> file_size(StringIO('my file content'))
464      15
465
466    Please note that this function expects the file-like object passed
467    in to be at first reading position (it does no seek(0)) and that
468    when finished the file pointer might be at end of file.
469    """
470    if hasattr(file_like_obj, 'fileno'):
471        return os.fstat(file_like_obj.fileno())[6]
472    file_like_obj.seek(0, 2) # seek to last position in file
473    return file_like_obj.tell()
474
475def get_user_account(request):
476    """Return local user account.
477    """
478    principal_id = request.principal.id
479    authenticator = getUtility(IAuthenticatorPlugin, name='users')
480    account = authenticator.getAccount(principal_id)
481    return account
482
483def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
484    """Get all attribute names of an interface.
485
486    Searches also base interfaces.
487
488    Names of fields that are pure attributes
489    (i.e. zope.interface.Attribute) or methods are excluded by
490    default.
491
492    Names of typical fields derived from zope.schema are included.
493
494    The `omit` paramter can give a list of names to exclude.
495
496    Returns an unsorted list of strings.
497    """
498    ifaces = set((iface,))
499    # Collect all interfaces (also bases) recursively
500    while True:
501        ext_ifaces = set(ifaces)
502        for iface in ext_ifaces:
503            ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
504        if ext_ifaces == ifaces:
505            # No new interfaces found, list complete
506            break
507        ifaces = ext_ifaces
508    # Collect (filtered) names of collected interfaces
509    result = []
510    for iface in ifaces:
511        for name, descr in iface.namesAndDescriptions():
512            if name in omit:
513                continue
514            if exclude_attribs and descr.__class__ is Attribute:
515                continue
516            if exclude_methods and isinstance(descr, Method):
517                continue
518            result.append(name)
519    return result
520
521def get_sorted_preferred(tuples_iterable, preferred_list):
522    """Get a list of tuples (<TITLE>,<TOKEN>) with values in
523    `preferred_list` put in front.
524
525    The rest of the tuples iterable is returned in orginal order. This
526    is useful for putting default entries on top of (already sorted)
527    lists of choice values, for instance when sorting countries and
528    their code.
529
530    Sample:
531
532    We have a list of tuples with uppercase 'titles' and lowercase
533    'tokens'. This list is already sorted but we want certain values
534    of this list to show up before other values. For instance we want
535    to see the 'C' entry to come first.
536
537      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
538      ...                       ['c'])
539      (('C', 'c'), ('A', 'a'), ('B', 'b'))
540
541    i.e. the entry with 'c' as second value moved to head of result.
542
543    We can also require multiple entries at head of list:
544
545      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
546      ...                       ['b', 'c'])
547      (('B', 'b'), ('C', 'c'), ('A', 'a'))
548
549    We required the 'b' entry to come before the 'c' entry and then
550    the rest of the input list. That's what we got.
551
552    The result is returned as a tuple of tuples to keep order of values.
553    """
554    result = [None for x in preferred_list]
555    for title, code in tuples_iterable:
556        if code in preferred_list:
557            index = preferred_list.index(code)
558            result[index] = (title, code)
559        else:
560            result.append((title, code))
561    return tuple(result)
562
563def now(tz=None):
564    """Get current datetime in timezone of `tz`.
565
566    If `tz`, a `tzinfo` instance, is None, UTC time is returned.
567
568    `tz` should be a timezone as defined in pytz.
569    """
570    return to_timezone(datetime.datetime.utcnow(), tz=tz)
571
572def to_timezone(dt, tz=None):
573    """Shift datetime into timezone `tz`.
574
575    If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
576    assumed to be UTC.
577
578    If no `tz` is given, shift to UTC is performed.
579
580    If `dt` is not a datetime.datetime, the input value is returned
581    unchanged.
582    """
583    if not isinstance(dt, datetime.datetime):
584        return dt
585    if tz is None:
586        tz = pytz.utc
587    if dt.tzinfo is None:
588        dt = pytz.utc.localize(dt)
589    return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
Note: See TracBrowser for help on using the repository browser.