source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 9002

Last change on this file since 9002 was 8739, checked in by Henrik Bettermann, 13 years ago

Improve logging (part 1).

  • Property svn:keywords set to Id
File size: 22.0 KB
Line 
1## $Id: helpers.py 8739 2012-06-17 12:13:45Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""General helper functions for Kofa.
19"""
20import csv
21import datetime
22import imghdr
23import os
24import pytz
25import re
26import shutil
27import tempfile
28import grok
29from cStringIO import StringIO
30from docutils.core import publish_string
31from zope.component import getUtility
32from zope.component.interfaces import IFactory
33from zope.interface import implementedBy
34from zope.interface.interface import Method, Attribute
35from zope.schema import getFieldNames
36from zope.schema.fieldproperty import FieldProperty
37from zope.security.interfaces import NoInteraction
38from zope.security.management import getInteraction
39from zope.pluggableauth.interfaces import IAuthenticatorPlugin
40from waeup.kofa.interfaces import MessageFactory as _
41
42BUFSIZE = 8 * 1024
43
44def remove_file_or_directory(filepath):
45    """Remove a file or directory.
46
47    Different to :func:`shutil.rmtree` we also accept not existing
48    paths (returning silently) and if a dir turns out to be a regular
49    file, we remove that.
50    """
51    filepath = os.path.abspath(filepath)
52    if not os.path.exists(filepath):
53        return
54    if os.path.isdir(filepath):
55        shutil.rmtree(filepath)
56    else:
57        os.unlink(filepath)
58    return
59
60def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
61    """Copy contents of directory src to directory dst.
62
63    Both directories must exists.
64
65    If `overwrite` is true, any same named objects will be
66    overwritten. Otherwise these files will not be touched.
67
68    If `del_old` is true, copied files and directories will be removed
69    from the src directory.
70
71    This functions returns a list of non-copied files.
72
73    Unix hidden files and directories (starting with '.') are not
74    processed by this function.
75    """
76    if not os.path.exists(src):
77        raise ValueError('source path does not exist: %s' % src)
78    if not os.path.exists(dst):
79        raise ValueError('destination path does not exist: %s' % dst)
80    if not os.path.isdir(src):
81        raise ValueError('source path is not a directory: %s' % src)
82    if not os.path.isdir(dst):
83        raise ValueError('destination path is not a directory: %s' % dst)
84    not_copied = []
85    for item in os.listdir(src):
86        if item.startswith('.'):
87            continue # We do not copy hidden stuff...
88        itemsrc = os.path.join(src, item)
89        itemdst = os.path.join(dst, item)
90
91        if os.path.exists(itemdst):
92            if overwrite is True:
93                remove_file_or_directory(itemdst)
94            else:
95                not_copied.append(item)
96                continue
97
98        if os.path.isdir(itemsrc):
99            shutil.copytree(itemsrc, itemdst)
100        else:
101            shutil.copy2(itemsrc, itemdst)
102        if del_old:
103            remove_file_or_directory(itemsrc)
104    return not_copied
105
106
107def get_inner_HTML_part(html_code):
108    """Return the 'inner' part of a complete HTML snippet.
109
110    If there is a form part, get this.
111
112    If there is no form part, try to return the body part contents.
113
114    If there is no body, return as-is.
115
116    Let's see how that works. If we deliver some doc with form, we
117    will get that form only:
118
119       >>> doc = '<html><form>My Form</form>Outside the form</html>'
120       >>> get_inner_HTML_part(doc)
121       '<form>My Form</form>'
122
123    No form? Then seek for a body part and get the contents:
124
125       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
126       >>> get_inner_HTML_part(doc)
127       'My Body'
128
129    If none of these is included, return what we got:
130
131       >>> doc = '<html>without body nor form</html>'
132       >>> get_inner_HTML_part(doc)
133       '<html>without body nor form</html>'
134
135    """
136
137    try:
138        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
139                          re.DOTALL).groups()[0]
140        return result
141    except AttributeError:
142        # No <form> part included
143        try:
144            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
145                              re.DOTALL).groups()[0]
146            return result
147        except AttributeError:
148            # No <form> and no <body> tag...
149            pass
150    return html_code
151
152class FactoryBase(grok.GlobalUtility):
153    """A factory for things.
154
155    This is a baseclass for easier creation of factories. Factories
156    are utilities that are registered under a certain name and return
157    instances of certain classes when called.
158
159    In :mod:`waeup.kofa` we use factories extensively for
160    batching. While processing a batch some processors looks up a
161    factory to create real-world instances that then get filled with
162    data from imported CSV files.
163
164    To get rid of reimplementing the same stuff over and over again,
165    most notably the methods defined here, we offer this base class
166    (which will *not* be registered as a factory itself).
167
168    Real factories can then be created like this:
169
170       >>> import grok
171       >>> from waeup.kofa.utils.helpers import FactoryBase
172       >>> class MyObject(object):
173       ...   # Some class we want to get instances of.
174       ...   pass
175       >>> class MyObjectFactory(FactoryBase):
176       ...   # This is the factory for MyObject instances
177       ...   grok.name(u'waeup.kofa.factory.MyObject')
178       ...   factory = MyObject
179
180    That's it. It is essential to set the ``factory`` attribute, which
181    will determine the class of which instances should be created when
182    called. The given name must even be unique amongst all utilities
183    registered during runtime. While you can pick any name you like
184    you might want to prepend ``waeup.kofa.factory.`` to the name
185    string to make sure it does not clash with names of other
186    utilities one day.
187
188    Before all this works we have to grok the baseclass once and our
189    freshly defined factory. This executes all the component
190    registration stuff we don't want to do ourselves. In daily use
191    this is done automatically on startup of a :mod:`waeup.kofa`
192    system.
193
194       >>> grok.testing.grok('waeup.kofa.utils.helpers')
195       >>> grok.testing.grok_component(
196       ...    'MyObjectFactory', MyObjectFactory
197       ...  )
198       True
199
200    After grokking we (and processors) can create objects without
201    knowing about the location of the real class definition, just by
202    the factory name:
203
204       >>> from zope.component import createObject
205       >>> obj = createObject('waeup.kofa.factory.MyObject')
206       >>> isinstance(obj, MyObject)
207       True
208
209    We can also use the regular utility lookups to find our new
210    factory:
211
212       >>> from zope.component import getUtility
213       >>> from zope.component.interfaces import IFactory
214       >>> factory = getUtility(
215       ...   IFactory, name='waeup.kofa.factory.MyObject'
216       ...   )
217       >>> isinstance(factory, MyObjectFactory)
218       True
219
220    And this factory generates `MyObject` instances:
221
222       >>> obj = factory()
223       >>> isinstance(obj, MyObject)
224       True
225
226    """
227    grok.baseclass() # Do not grok this class, do not register us.
228    grok.implements(IFactory)
229    # You can override any of the following attributes in derived
230    # classes. The `grok.name` setting *must* even be set to some
231    # unique value.
232    grok.name(u'waeup.Factory')
233    title = u"Create instances of ``factory``.",
234    description = u"This factory instantiates new applicant instances."
235    factory = None
236
237    def __call__(self, *args, **kw):
238        """The main factory function.
239
240        Returns an instance of the requested object.
241        """
242        return self.factory()
243
244    def getInterfaces(self):
245        # Required by IFactory
246        return implementedBy(self.factory)
247
248def ReST2HTML_w_warnings(source_string):
249    """Convert a reStructuredText string to HTML preserving warnings.
250
251    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
252    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
253    source string (in unicode), ``<WARNINGS>`` is a string containing
254    any warning messages or ``None``.
255
256    Regular multi-line ReStructuredText strings will be returned as
257    HTML code:
258
259        >>> from waeup.kofa.utils.helpers import ReST2HTML
260        >>> source = '''
261        ... Headline
262        ... ========
263        ...
264        ... - A list item
265        ... - Another item
266        ...
267        ... Thanks for watching!
268        ... '''
269        >>> html, warnings = ReST2HTML_w_warnings(source)
270        >>> print html
271        <div class="document" id="headline">
272        <h1 class="title">Headline</h1>
273        <BLANKLINE>
274        <ul class="simple">
275        <li>A list item</li>
276        <li>Another item</li>
277        </ul>
278        <p>Thanks for watching!</p>
279        </div>
280
281    Here no warnings happened, so the `warnings` are ``None``:
282
283        >>> warnings is None
284        True
285
286    If warnings happen then they can be retrieved in the returned
287    ``warnings``. We try to render an erraneous document:
288
289        >>> source = '''
290        ... Headline
291        ... ======
292        ...
293        ... Thanks for watching!
294        ... '''
295        >>> html, warnings = ReST2HTML_w_warnings(source)
296        >>> print html
297        <div class="document" id="headline">
298        <h1 class="title">Headline</h1>
299        <BLANKLINE>
300        <p>Thanks for watching!</p>
301        </div>
302
303        >>> print warnings
304        <string>:3: (WARNING/2) Title underline too short.
305        <BLANKLINE>
306        Headline
307        ======
308        <BLANKLINE>
309
310    As you can see, the warnings are not displayed inline the document
311    but can be retrieved from the returned warnings, which is a string
312    or ``None``.
313    """
314    warnings = StringIO()
315    fulldoc = publish_string(
316        source_string, writer_name='html4css1',
317        settings_overrides={
318            'report_level': 0,
319            'warning_stream': warnings,
320            })
321    warnings.seek(0)
322    warning_msgs = warnings.read()
323    if warning_msgs:
324        # Render again, this time with no warnings inline...
325        fulldoc =  publish_string(
326        source_string, writer_name='html4css1',
327        settings_overrides={
328            'report_level': 10000,
329            'halt_level': 10000,
330            'warning_stream': warnings,
331            })
332    if warning_msgs == '':
333        warning_msgs = None
334    result = get_inner_HTML_part(fulldoc).strip()
335    if not isinstance(result, unicode):
336        result = result.decode('utf-8')
337    return result, warning_msgs
338
339def ReST2HTML(source_string):
340    """Render a string containing ReStructuredText to HTML.
341
342    Any warnings about too short headings, etc. are silently
343    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
344    warnings.
345
346    The returned string will be unicode.
347
348    A regular document will be rendered like this:
349
350        >>> source = '''
351        ... Headline
352        ... ========
353        ...
354        ... Thanks for watching!
355        ... '''
356        >>> html = ReST2HTML(source)
357        >>> print html
358        <div class="document" id="headline">
359        <h1 class="title">Headline</h1>
360        <BLANKLINE>
361        <p>Thanks for watching!</p>
362        </div>
363
364    A document with markup problems (here: the underline is too short)
365    will look similar:
366
367        >>> source = '''
368        ... Headline
369        ... ======
370        ...
371        ... Thanks for watching!
372        ... '''
373        >>> html = ReST2HTML(source)
374        >>> print html
375        <div class="document" id="headline">
376        <h1 class="title">Headline</h1>
377        <BLANKLINE>
378        <p>Thanks for watching!</p>
379        </div>
380
381    """
382    html, warnings = ReST2HTML_w_warnings(source_string)
383    return html
384
385def attrs_to_fields(cls):
386    """Turn the attributes of a class into FieldProperty instances.
387
388    With Python >= 2.6 we can even use this function as a class decorator.
389    """
390    iface = list(implementedBy(cls))[0]
391    for field_name in getFieldNames(iface):
392        setattr(cls, field_name, FieldProperty(iface[field_name]))
393    return cls
394
395def get_current_principal():
396    """Get the 'current' principal.
397
398    This method works without a request. Examining a request is the
399    regular (and recommended) way to get a principal involved
400    'currently'.
401
402    Use this method only if you really have no access to the current
403    request.
404
405    Returns ``None`` when no principal is involved (for instance
406    during tests).
407    """
408    try:
409        principal = getInteraction().participations[0].principal
410    except NoInteraction:
411        return None
412    except IndexError: # No participations present
413        return None
414    return principal
415
416def cmp_files(file_descr1, file_descr2):
417    """Compare two files by their file descriptors.
418
419    Returns ``True`` if both are equal, ``False`` otherwise.
420    """
421    file_descr1.seek(0)
422    file_descr2.seek(0)
423    while True:
424        b1 = file_descr1.read(BUFSIZE)
425        b2 = file_descr2.read(BUFSIZE)
426        if b1 != b2:
427            return False
428        if not b1:
429            return True
430
431def string_from_bytes(number):
432    """Turn a number into some textual representation.
433
434      Examples:
435
436        >>> string_from_bytes(1)
437        u'1 byte(s)'
438
439        >>> string_from_bytes(1025)
440        u'1 KB'
441
442        >>> string_from_bytes(1.5 * 1024*1024)
443        u'1.50 MB'
444
445        >>> string_from_bytes(673.286 * 1024**3)
446        u'673.29 GB'
447
448    """
449    if number < 1024:
450        return u'%s byte(s)' % (str(number),)
451    elif number < 1024**2:
452        return u'%s KB' % (number / 1024,)
453    elif number < 1024**3:
454        return u'%.2f MB' % (number / 1024**2,)
455    return u'%.2f GB' % (number / 1024**3,)
456
457def file_size(file_like_obj):
458    """Determine file size in most effective manner.
459
460    Returns the number of bytes in a file. This function works for
461    both, real files as well as file-like objects like cStringIO based
462    'files'.
463
464    Example:
465
466      >>> from cStringIO import StringIO
467      >>> file_size(StringIO('my file content'))
468      15
469
470    Please note that this function expects the file-like object passed
471    in to be at first reading position (it does no seek(0)) and that
472    when finished the file pointer might be at end of file.
473    """
474    if hasattr(file_like_obj, 'fileno'):
475        return os.fstat(file_like_obj.fileno())[6]
476    file_like_obj.seek(0, 2) # seek to last position in file
477    return file_like_obj.tell()
478
479def get_user_account(request):
480    """Return local user account.
481    """
482    principal_id = request.principal.id
483    authenticator = getUtility(IAuthenticatorPlugin, name='users')
484    account = authenticator.getAccount(principal_id)
485    return account
486
487def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
488    """Get all attribute names of an interface.
489
490    Searches also base interfaces.
491
492    Names of fields that are pure attributes
493    (i.e. zope.interface.Attribute) or methods are excluded by
494    default.
495
496    Names of typical fields derived from zope.schema are included.
497
498    The `omit` paramter can give a list of names to exclude.
499
500    Returns an unsorted list of strings.
501    """
502    ifaces = set((iface,))
503    # Collect all interfaces (also bases) recursively
504    while True:
505        ext_ifaces = set(ifaces)
506        for iface in ext_ifaces:
507            ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
508        if ext_ifaces == ifaces:
509            # No new interfaces found, list complete
510            break
511        ifaces = ext_ifaces
512    # Collect (filtered) names of collected interfaces
513    result = []
514    for iface in ifaces:
515        for name, descr in iface.namesAndDescriptions():
516            if name in omit:
517                continue
518            if exclude_attribs and descr.__class__ is Attribute:
519                continue
520            if exclude_methods and isinstance(descr, Method):
521                continue
522            result.append(name)
523    return result
524
525def get_sorted_preferred(tuples_iterable, preferred_list):
526    """Get a list of tuples (<TITLE>,<TOKEN>) with values in
527    `preferred_list` put in front.
528
529    The rest of the tuples iterable is returned in orginal order. This
530    is useful for putting default entries on top of (already sorted)
531    lists of choice values, for instance when sorting countries and
532    their code.
533
534    Sample:
535
536    We have a list of tuples with uppercase 'titles' and lowercase
537    'tokens'. This list is already sorted but we want certain values
538    of this list to show up before other values. For instance we want
539    to see the 'C' entry to come first.
540
541      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
542      ...                       ['c'])
543      (('C', 'c'), ('A', 'a'), ('B', 'b'))
544
545    i.e. the entry with 'c' as second value moved to head of result.
546
547    We can also require multiple entries at head of list:
548
549      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
550      ...                       ['b', 'c'])
551      (('B', 'b'), ('C', 'c'), ('A', 'a'))
552
553    We required the 'b' entry to come before the 'c' entry and then
554    the rest of the input list. That's what we got.
555
556    The result is returned as a tuple of tuples to keep order of values.
557    """
558    result = [None for x in preferred_list]
559    for title, code in tuples_iterable:
560        if code in preferred_list:
561            index = preferred_list.index(code)
562            result[index] = (title, code)
563        else:
564            result.append((title, code))
565    return tuple(result)
566
567def now(tz=None):
568    """Get current datetime in timezone of `tz`.
569
570    If `tz`, a `tzinfo` instance, is None, UTC time is returned.
571
572    `tz` should be a timezone as defined in pytz.
573    """
574    return to_timezone(datetime.datetime.utcnow(), tz=tz)
575
576def to_timezone(dt, tz=None):
577    """Shift datetime into timezone `tz`.
578
579    If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
580    assumed to be UTC.
581
582    If no `tz` is given, shift to UTC is performed.
583
584    If `dt` is not a datetime.datetime, the input value is returned
585    unchanged.
586    """
587    if not isinstance(dt, datetime.datetime):
588        return dt
589    if tz is None:
590        tz = pytz.utc
591    if dt.tzinfo is None:
592        dt = pytz.utc.localize(dt)
593    return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
594
595def get_fileformat(path, bytestream=None):
596    """Try to determine the file format of a given media file.
597
598    Although checks done here are not done very thoroughly, they make
599    no assumptions about the filetype by looking at its filename
600    extension or similar. Instead they check header data to comply
601    with common known rules (Magic Words).
602
603    If bytestream is not `None` the `path` is ignored.
604
605    Returns filetype as string (something like ``'jpg'``) if
606    file-format can be recognized, ``None`` else.
607
608    Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
609
610    More filetypes (though untested in waeup.kofa) are automatically
611    recognized because we deploy the stdlib `imghdr` library. See this
612    module's docs for a complete list of filetypes recognized.
613    """
614    if path is None and bytestream is None:
615        return None
616
617    img_type = None
618    if bytestream is not None:
619        img_type = imghdr.what(path, bytestream)
620    else:
621        img_type = imghdr.what(path)
622    for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
623        if img_type == name:
624            img_type = replacement
625    return img_type
626
627def check_pdf(bytestream, file):
628    """Tell whether a file or bytestream is a PDF file.
629
630    Works as a test/plugin for the stdlib `imghdr` library.
631    """
632    if file is not None:
633        file.seek(0)
634        bytestream = file.read(4)
635        file.seek(0)
636
637    if bytestream.startswith('%PDF'):
638        return 'pdf'
639    return None
640
641# register check_pdf as header check function with `imghdr`
642if check_pdf not in imghdr.tests:
643    imghdr.tests.append(check_pdf)
644
645def merge_csv_files(path1, path2):
646    """Merge two CSV files into one (appending).
647
648    CSV data from `path2` will be merged into `path1` csv file. This
649    is a bit like 'appending' data from path2 to data from path1.
650
651    The path of the resulting temporary file will be returned.
652
653    In the result file data from `path2` will always come _after_ data
654    from `path1`.
655
656    **Caution**: It is the _callers_ responsibility to remove the
657    result file (which is created by tempfile.mkstemp) after usage.
658
659    This CSV file merging copes with different column orders in both
660    CSV files and even with different column sets in both files.
661
662    Also broken/empty CSV files can be handled.
663    """
664    # sniff the col names
665    try:
666        row10 = csv.DictReader(open(path1, 'rb')).next()
667    except StopIteration:
668        row10 = dict()
669    try:
670        row20 = csv.DictReader(open(path2, 'rb')).next()
671    except StopIteration:
672        row20 = dict()
673    fieldnames = sorted(list(set(row10.keys() + row20.keys())))
674    # now read/write the real data
675    reader1 = csv.DictReader(open(path1, 'rb'))
676    reader2 = csv.DictReader(open(path2, 'rb'))
677    wp, tmp_path = tempfile.mkstemp()
678    writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
679    writer.writerow(dict((x,x) for x in fieldnames)) # header
680    for row in reader1:
681        writer.writerow(row)
682    for row in reader2:
683        writer.writerow(row)
684    return tmp_path
685
686# Save function used for save methods in pages
687def msave(view, **data):
688    changed_fields = view.applyData(view.context, **data)
689    # Turn list of lists into single list
690    if changed_fields:
691        changed_fields = reduce(lambda x,y: x+y, changed_fields.values())
692    fields_string = ' + '.join(changed_fields)
693    view.flash(_('Form has been saved.'))
694    ob_class = view.__implemented__.__name__.replace('waeup.kofa.','')
695    if fields_string:
696        grok.getSite().logger.info('%s - saved: %s' % (ob_class, fields_string))
697    return
Note: See TracBrowser for help on using the repository browser.