source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 9674

Last change on this file since 9674 was 9593, checked in by uli, 12 years ago

Add a logging NullHandler? for Python < 2.7 compatibility.

  • Property svn:keywords set to Id
File size: 22.2 KB
Line 
1## $Id: helpers.py 9593 2012-11-09 15:35:08Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""General helper functions for Kofa.
19"""
20import csv
21import datetime
22import imghdr
23import logging
24import os
25import pytz
26import re
27import shutil
28import tempfile
29import grok
30from cStringIO import StringIO
31from docutils.core import publish_string
32from zope.component import getUtility
33from zope.component.interfaces import IFactory
34from zope.interface import implementedBy
35from zope.interface.interface import Method, Attribute
36from zope.schema import getFieldNames
37from zope.schema.fieldproperty import FieldProperty
38from zope.security.interfaces import NoInteraction
39from zope.security.management import getInteraction
40from zope.pluggableauth.interfaces import IAuthenticatorPlugin
41from waeup.kofa.interfaces import MessageFactory as _
42
43BUFSIZE = 8 * 1024
44
45def remove_file_or_directory(filepath):
46    """Remove a file or directory.
47
48    Different to :func:`shutil.rmtree` we also accept not existing
49    paths (returning silently) and if a dir turns out to be a regular
50    file, we remove that.
51    """
52    filepath = os.path.abspath(filepath)
53    if not os.path.exists(filepath):
54        return
55    if os.path.isdir(filepath):
56        shutil.rmtree(filepath)
57    else:
58        os.unlink(filepath)
59    return
60
61def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
62    """Copy contents of directory src to directory dst.
63
64    Both directories must exists.
65
66    If `overwrite` is true, any same named objects will be
67    overwritten. Otherwise these files will not be touched.
68
69    If `del_old` is true, copied files and directories will be removed
70    from the src directory.
71
72    This functions returns a list of non-copied files.
73
74    Unix hidden files and directories (starting with '.') are not
75    processed by this function.
76    """
77    if not os.path.exists(src):
78        raise ValueError('source path does not exist: %s' % src)
79    if not os.path.exists(dst):
80        raise ValueError('destination path does not exist: %s' % dst)
81    if not os.path.isdir(src):
82        raise ValueError('source path is not a directory: %s' % src)
83    if not os.path.isdir(dst):
84        raise ValueError('destination path is not a directory: %s' % dst)
85    not_copied = []
86    for item in os.listdir(src):
87        if item.startswith('.'):
88            continue # We do not copy hidden stuff...
89        itemsrc = os.path.join(src, item)
90        itemdst = os.path.join(dst, item)
91
92        if os.path.exists(itemdst):
93            if overwrite is True:
94                remove_file_or_directory(itemdst)
95            else:
96                not_copied.append(item)
97                continue
98
99        if os.path.isdir(itemsrc):
100            shutil.copytree(itemsrc, itemdst)
101        else:
102            shutil.copy2(itemsrc, itemdst)
103        if del_old:
104            remove_file_or_directory(itemsrc)
105    return not_copied
106
107
108def get_inner_HTML_part(html_code):
109    """Return the 'inner' part of a complete HTML snippet.
110
111    If there is a form part, get this.
112
113    If there is no form part, try to return the body part contents.
114
115    If there is no body, return as-is.
116
117    Let's see how that works. If we deliver some doc with form, we
118    will get that form only:
119
120       >>> doc = '<html><form>My Form</form>Outside the form</html>'
121       >>> get_inner_HTML_part(doc)
122       '<form>My Form</form>'
123
124    No form? Then seek for a body part and get the contents:
125
126       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
127       >>> get_inner_HTML_part(doc)
128       'My Body'
129
130    If none of these is included, return what we got:
131
132       >>> doc = '<html>without body nor form</html>'
133       >>> get_inner_HTML_part(doc)
134       '<html>without body nor form</html>'
135
136    """
137
138    try:
139        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
140                          re.DOTALL).groups()[0]
141        return result
142    except AttributeError:
143        # No <form> part included
144        try:
145            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
146                              re.DOTALL).groups()[0]
147            return result
148        except AttributeError:
149            # No <form> and no <body> tag...
150            pass
151    return html_code
152
153class FactoryBase(grok.GlobalUtility):
154    """A factory for things.
155
156    This is a baseclass for easier creation of factories. Factories
157    are utilities that are registered under a certain name and return
158    instances of certain classes when called.
159
160    In :mod:`waeup.kofa` we use factories extensively for
161    batching. While processing a batch some processors looks up a
162    factory to create real-world instances that then get filled with
163    data from imported CSV files.
164
165    To get rid of reimplementing the same stuff over and over again,
166    most notably the methods defined here, we offer this base class
167    (which will *not* be registered as a factory itself).
168
169    Real factories can then be created like this:
170
171       >>> import grok
172       >>> from waeup.kofa.utils.helpers import FactoryBase
173       >>> class MyObject(object):
174       ...   # Some class we want to get instances of.
175       ...   pass
176       >>> class MyObjectFactory(FactoryBase):
177       ...   # This is the factory for MyObject instances
178       ...   grok.name(u'waeup.kofa.factory.MyObject')
179       ...   factory = MyObject
180
181    That's it. It is essential to set the ``factory`` attribute, which
182    will determine the class of which instances should be created when
183    called. The given name must even be unique amongst all utilities
184    registered during runtime. While you can pick any name you like
185    you might want to prepend ``waeup.kofa.factory.`` to the name
186    string to make sure it does not clash with names of other
187    utilities one day.
188
189    Before all this works we have to grok the baseclass once and our
190    freshly defined factory. This executes all the component
191    registration stuff we don't want to do ourselves. In daily use
192    this is done automatically on startup of a :mod:`waeup.kofa`
193    system.
194
195       >>> grok.testing.grok('waeup.kofa.utils.helpers')
196       >>> grok.testing.grok_component(
197       ...    'MyObjectFactory', MyObjectFactory
198       ...  )
199       True
200
201    After grokking we (and processors) can create objects without
202    knowing about the location of the real class definition, just by
203    the factory name:
204
205       >>> from zope.component import createObject
206       >>> obj = createObject('waeup.kofa.factory.MyObject')
207       >>> isinstance(obj, MyObject)
208       True
209
210    We can also use the regular utility lookups to find our new
211    factory:
212
213       >>> from zope.component import getUtility
214       >>> from zope.component.interfaces import IFactory
215       >>> factory = getUtility(
216       ...   IFactory, name='waeup.kofa.factory.MyObject'
217       ...   )
218       >>> isinstance(factory, MyObjectFactory)
219       True
220
221    And this factory generates `MyObject` instances:
222
223       >>> obj = factory()
224       >>> isinstance(obj, MyObject)
225       True
226
227    """
228    grok.baseclass() # Do not grok this class, do not register us.
229    grok.implements(IFactory)
230    # You can override any of the following attributes in derived
231    # classes. The `grok.name` setting *must* even be set to some
232    # unique value.
233    grok.name(u'waeup.Factory')
234    title = u"Create instances of ``factory``.",
235    description = u"This factory instantiates new applicant instances."
236    factory = None
237
238    def __call__(self, *args, **kw):
239        """The main factory function.
240
241        Returns an instance of the requested object.
242        """
243        return self.factory()
244
245    def getInterfaces(self):
246        # Required by IFactory
247        return implementedBy(self.factory)
248
249def ReST2HTML_w_warnings(source_string):
250    """Convert a reStructuredText string to HTML preserving warnings.
251
252    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
253    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
254    source string (in unicode), ``<WARNINGS>`` is a string containing
255    any warning messages or ``None``.
256
257    Regular multi-line ReStructuredText strings will be returned as
258    HTML code:
259
260        >>> from waeup.kofa.utils.helpers import ReST2HTML
261        >>> source = '''
262        ... Headline
263        ... ========
264        ...
265        ... - A list item
266        ... - Another item
267        ...
268        ... Thanks for watching!
269        ... '''
270        >>> html, warnings = ReST2HTML_w_warnings(source)
271        >>> print html
272        <div class="document" id="headline">
273        <h1 class="title">Headline</h1>
274        <BLANKLINE>
275        <ul class="simple">
276        <li>A list item</li>
277        <li>Another item</li>
278        </ul>
279        <p>Thanks for watching!</p>
280        </div>
281
282    Here no warnings happened, so the `warnings` are ``None``:
283
284        >>> warnings is None
285        True
286
287    If warnings happen then they can be retrieved in the returned
288    ``warnings``. We try to render an erraneous document:
289
290        >>> source = '''
291        ... Headline
292        ... ======
293        ...
294        ... Thanks for watching!
295        ... '''
296        >>> html, warnings = ReST2HTML_w_warnings(source)
297        >>> print html
298        <div class="document" id="headline">
299        <h1 class="title">Headline</h1>
300        <BLANKLINE>
301        <p>Thanks for watching!</p>
302        </div>
303
304        >>> print warnings
305        <string>:3: (WARNING/2) Title underline too short.
306        <BLANKLINE>
307        Headline
308        ======
309        <BLANKLINE>
310
311    As you can see, the warnings are not displayed inline the document
312    but can be retrieved from the returned warnings, which is a string
313    or ``None``.
314    """
315    warnings = StringIO()
316    fulldoc = publish_string(
317        source_string, writer_name='html4css1',
318        settings_overrides={
319            'report_level': 0,
320            'warning_stream': warnings,
321            })
322    warnings.seek(0)
323    warning_msgs = warnings.read()
324    if warning_msgs:
325        # Render again, this time with no warnings inline...
326        fulldoc =  publish_string(
327        source_string, writer_name='html4css1',
328        settings_overrides={
329            'report_level': 10000,
330            'halt_level': 10000,
331            'warning_stream': warnings,
332            })
333    if warning_msgs == '':
334        warning_msgs = None
335    result = get_inner_HTML_part(fulldoc).strip()
336    if not isinstance(result, unicode):
337        result = result.decode('utf-8')
338    return result, warning_msgs
339
340def ReST2HTML(source_string):
341    """Render a string containing ReStructuredText to HTML.
342
343    Any warnings about too short headings, etc. are silently
344    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
345    warnings.
346
347    The returned string will be unicode.
348
349    A regular document will be rendered like this:
350
351        >>> source = '''
352        ... Headline
353        ... ========
354        ...
355        ... Thanks for watching!
356        ... '''
357        >>> html = ReST2HTML(source)
358        >>> print html
359        <div class="document" id="headline">
360        <h1 class="title">Headline</h1>
361        <BLANKLINE>
362        <p>Thanks for watching!</p>
363        </div>
364
365    A document with markup problems (here: the underline is too short)
366    will look similar:
367
368        >>> source = '''
369        ... Headline
370        ... ======
371        ...
372        ... Thanks for watching!
373        ... '''
374        >>> html = ReST2HTML(source)
375        >>> print html
376        <div class="document" id="headline">
377        <h1 class="title">Headline</h1>
378        <BLANKLINE>
379        <p>Thanks for watching!</p>
380        </div>
381
382    """
383    html, warnings = ReST2HTML_w_warnings(source_string)
384    return html
385
386def attrs_to_fields(cls):
387    """Turn the attributes of a class into FieldProperty instances.
388
389    With Python >= 2.6 we can even use this function as a class decorator.
390    """
391    iface = list(implementedBy(cls))[0]
392    for field_name in getFieldNames(iface):
393        setattr(cls, field_name, FieldProperty(iface[field_name]))
394    return cls
395
396def get_current_principal():
397    """Get the 'current' principal.
398
399    This method works without a request. Examining a request is the
400    regular (and recommended) way to get a principal involved
401    'currently'.
402
403    Use this method only if you really have no access to the current
404    request.
405
406    Returns ``None`` when no principal is involved (for instance
407    during tests).
408    """
409    try:
410        principal = getInteraction().participations[0].principal
411    except NoInteraction:
412        return None
413    except IndexError: # No participations present
414        return None
415    return principal
416
417def cmp_files(file_descr1, file_descr2):
418    """Compare two files by their file descriptors.
419
420    Returns ``True`` if both are equal, ``False`` otherwise.
421    """
422    file_descr1.seek(0)
423    file_descr2.seek(0)
424    while True:
425        b1 = file_descr1.read(BUFSIZE)
426        b2 = file_descr2.read(BUFSIZE)
427        if b1 != b2:
428            return False
429        if not b1:
430            return True
431
432def string_from_bytes(number):
433    """Turn a number into some textual representation.
434
435      Examples:
436
437        >>> string_from_bytes(1)
438        u'1 byte(s)'
439
440        >>> string_from_bytes(1025)
441        u'1 KB'
442
443        >>> string_from_bytes(1.5 * 1024*1024)
444        u'1.50 MB'
445
446        >>> string_from_bytes(673.286 * 1024**3)
447        u'673.29 GB'
448
449    """
450    if number < 1024:
451        return u'%s byte(s)' % (str(number),)
452    elif number < 1024**2:
453        return u'%s KB' % (number / 1024,)
454    elif number < 1024**3:
455        return u'%.2f MB' % (number / 1024**2,)
456    return u'%.2f GB' % (number / 1024**3,)
457
458def file_size(file_like_obj):
459    """Determine file size in most effective manner.
460
461    Returns the number of bytes in a file. This function works for
462    both, real files as well as file-like objects like cStringIO based
463    'files'.
464
465    Example:
466
467      >>> from cStringIO import StringIO
468      >>> file_size(StringIO('my file content'))
469      15
470
471    Please note that this function expects the file-like object passed
472    in to be at first reading position (it does no seek(0)) and that
473    when finished the file pointer might be at end of file.
474    """
475    if hasattr(file_like_obj, 'fileno'):
476        return os.fstat(file_like_obj.fileno())[6]
477    file_like_obj.seek(0, 2) # seek to last position in file
478    return file_like_obj.tell()
479
480def get_user_account(request):
481    """Return local user account.
482    """
483    principal_id = request.principal.id
484    authenticator = getUtility(IAuthenticatorPlugin, name='users')
485    account = authenticator.getAccount(principal_id)
486    return account
487
488def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
489    """Get all attribute names of an interface.
490
491    Searches also base interfaces.
492
493    Names of fields that are pure attributes
494    (i.e. zope.interface.Attribute) or methods are excluded by
495    default.
496
497    Names of typical fields derived from zope.schema are included.
498
499    The `omit` paramter can give a list of names to exclude.
500
501    Returns an unsorted list of strings.
502    """
503    ifaces = set((iface,))
504    # Collect all interfaces (also bases) recursively
505    while True:
506        ext_ifaces = set(ifaces)
507        for iface in ext_ifaces:
508            ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
509        if ext_ifaces == ifaces:
510            # No new interfaces found, list complete
511            break
512        ifaces = ext_ifaces
513    # Collect (filtered) names of collected interfaces
514    result = []
515    for iface in ifaces:
516        for name, descr in iface.namesAndDescriptions():
517            if name in omit:
518                continue
519            if exclude_attribs and descr.__class__ is Attribute:
520                continue
521            if exclude_methods and isinstance(descr, Method):
522                continue
523            if name in result:
524                continue
525            result.append(name)
526    return result
527
528def get_sorted_preferred(tuples_iterable, preferred_list):
529    """Get a list of tuples (<TITLE>,<TOKEN>) with values in
530    `preferred_list` put in front.
531
532    The rest of the tuples iterable is returned in orginal order. This
533    is useful for putting default entries on top of (already sorted)
534    lists of choice values, for instance when sorting countries and
535    their code.
536
537    Sample:
538
539    We have a list of tuples with uppercase 'titles' and lowercase
540    'tokens'. This list is already sorted but we want certain values
541    of this list to show up before other values. For instance we want
542    to see the 'C' entry to come first.
543
544      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
545      ...                       ['c'])
546      (('C', 'c'), ('A', 'a'), ('B', 'b'))
547
548    i.e. the entry with 'c' as second value moved to head of result.
549
550    We can also require multiple entries at head of list:
551
552      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
553      ...                       ['b', 'c'])
554      (('B', 'b'), ('C', 'c'), ('A', 'a'))
555
556    We required the 'b' entry to come before the 'c' entry and then
557    the rest of the input list. That's what we got.
558
559    The result is returned as a tuple of tuples to keep order of values.
560    """
561    result = [None for x in preferred_list]
562    for title, code in tuples_iterable:
563        if code in preferred_list:
564            index = preferred_list.index(code)
565            result[index] = (title, code)
566        else:
567            result.append((title, code))
568    return tuple(result)
569
570def now(tz=None):
571    """Get current datetime in timezone of `tz`.
572
573    If `tz`, a `tzinfo` instance, is None, UTC time is returned.
574
575    `tz` should be a timezone as defined in pytz.
576    """
577    return to_timezone(datetime.datetime.utcnow(), tz=tz)
578
579def to_timezone(dt, tz=None):
580    """Shift datetime into timezone `tz`.
581
582    If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
583    assumed to be UTC.
584
585    If no `tz` is given, shift to UTC is performed.
586
587    If `dt` is not a datetime.datetime, the input value is returned
588    unchanged.
589    """
590    if not isinstance(dt, datetime.datetime):
591        return dt
592    if tz is None:
593        tz = pytz.utc
594    if dt.tzinfo is None:
595        dt = pytz.utc.localize(dt)
596    return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
597
598def get_fileformat(path, bytestream=None):
599    """Try to determine the file format of a given media file.
600
601    Although checks done here are not done very thoroughly, they make
602    no assumptions about the filetype by looking at its filename
603    extension or similar. Instead they check header data to comply
604    with common known rules (Magic Words).
605
606    If bytestream is not `None` the `path` is ignored.
607
608    Returns filetype as string (something like ``'jpg'``) if
609    file-format can be recognized, ``None`` else.
610
611    Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
612
613    More filetypes (though untested in waeup.kofa) are automatically
614    recognized because we deploy the stdlib `imghdr` library. See this
615    module's docs for a complete list of filetypes recognized.
616    """
617    if path is None and bytestream is None:
618        return None
619
620    img_type = None
621    if bytestream is not None:
622        img_type = imghdr.what(path, bytestream)
623    else:
624        img_type = imghdr.what(path)
625    for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
626        if img_type == name:
627            img_type = replacement
628    return img_type
629
630def check_pdf(bytestream, file):
631    """Tell whether a file or bytestream is a PDF file.
632
633    Works as a test/plugin for the stdlib `imghdr` library.
634    """
635    if file is not None:
636        file.seek(0)
637        bytestream = file.read(4)
638        file.seek(0)
639
640    if bytestream.startswith('%PDF'):
641        return 'pdf'
642    return None
643
644# register check_pdf as header check function with `imghdr`
645if check_pdf not in imghdr.tests:
646    imghdr.tests.append(check_pdf)
647
648def merge_csv_files(path1, path2):
649    """Merge two CSV files into one (appending).
650
651    CSV data from `path2` will be merged into `path1` csv file. This
652    is a bit like 'appending' data from path2 to data from path1.
653
654    The path of the resulting temporary file will be returned.
655
656    In the result file data from `path2` will always come _after_ data
657    from `path1`.
658
659    **Caution**: It is the _callers_ responsibility to remove the
660    result file (which is created by tempfile.mkstemp) after usage.
661
662    This CSV file merging copes with different column orders in both
663    CSV files and even with different column sets in both files.
664
665    Also broken/empty CSV files can be handled.
666    """
667    # sniff the col names
668    try:
669        row10 = csv.DictReader(open(path1, 'rb')).next()
670    except StopIteration:
671        row10 = dict()
672    try:
673        row20 = csv.DictReader(open(path2, 'rb')).next()
674    except StopIteration:
675        row20 = dict()
676    fieldnames = sorted(list(set(row10.keys() + row20.keys())))
677    # now read/write the real data
678    reader1 = csv.DictReader(open(path1, 'rb'))
679    reader2 = csv.DictReader(open(path2, 'rb'))
680    wp, tmp_path = tempfile.mkstemp()
681    writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
682    writer.writerow(dict((x,x) for x in fieldnames)) # header
683    for row in reader1:
684        writer.writerow(row)
685    for row in reader2:
686        writer.writerow(row)
687    return tmp_path
688
689def product(sequence, start=1):
690    """Returns the product of a sequence of numbers (_not_ strings)
691    multiplied by the parameter `start` (defaults to 1). If the
692    sequence is empty, returns 0.
693    """
694    if not len(sequence):
695        return 0
696    result = start
697    for item in sequence:
698        result *= item
699    return result
700
701class NullHandler(logging.Handler):
702    """A logging NullHandler.
703
704    Does not log anything. Useful if you want to shut up a log.
705
706    Defined here for backwards compatibility with Python < 2.7.
707    """
708    def emit(self, record):
709        pass
Note: See TracBrowser for help on using the repository browser.