source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 11588

Last change on this file since 11588 was 11476, checked in by Henrik Bettermann, 11 years ago

Replace default FieldProperty? docstring by field title.

  • Property svn:keywords set to Id
File size: 23.2 KB
Line 
1## $Id: helpers.py 11476 2014-03-07 09:32:43Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""General helper functions for Kofa.
19"""
20import unicodecsv as csv  # XXX: csv ops should move to dedicated module.
21import datetime
22import imghdr
23import logging
24import os
25import pytz
26import re
27import shutil
28import tempfile
29import grok
30from cStringIO import StringIO
31from docutils.core import publish_string
32from zope.component import getUtility
33from zope.component.interfaces import IFactory
34from zope.interface import implementedBy
35from zope.interface.interface import Method, Attribute
36from zope.schema import getFieldNames
37from zope.schema.fieldproperty import FieldProperty
38from zope.security.interfaces import NoInteraction
39from zope.security.management import getInteraction
40from zope.pluggableauth.interfaces import IAuthenticatorPlugin
41
42BUFSIZE = 8 * 1024
43
44
45def remove_file_or_directory(filepath):
46    """Remove a file or directory.
47
48    Different to :func:`shutil.rmtree` we also accept not existing
49    paths (returning silently) and if a dir turns out to be a regular
50    file, we remove that.
51    """
52    filepath = os.path.abspath(filepath)
53    if not os.path.exists(filepath):
54        return
55    if os.path.isdir(filepath):
56        shutil.rmtree(filepath)
57    else:
58        os.unlink(filepath)
59    return
60
61
62def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
63    """Copy contents of directory src to directory dst.
64
65    Both directories must exists.
66
67    If `overwrite` is true, any same named objects will be
68    overwritten. Otherwise these files will not be touched.
69
70    If `del_old` is true, copied files and directories will be removed
71    from the src directory.
72
73    This functions returns a list of non-copied files.
74
75    Unix hidden files and directories (starting with '.') are not
76    processed by this function.
77    """
78    if not os.path.exists(src):
79        raise ValueError('source path does not exist: %s' % src)
80    if not os.path.exists(dst):
81        raise ValueError('destination path does not exist: %s' % dst)
82    if not os.path.isdir(src):
83        raise ValueError('source path is not a directory: %s' % src)
84    if not os.path.isdir(dst):
85        raise ValueError('destination path is not a directory: %s' % dst)
86    not_copied = []
87    for item in os.listdir(src):
88        if item.startswith('.'):
89            continue  # We do not copy hidden stuff...
90        itemsrc = os.path.join(src, item)
91        itemdst = os.path.join(dst, item)
92
93        if os.path.exists(itemdst):
94            if overwrite is True:
95                remove_file_or_directory(itemdst)
96            else:
97                not_copied.append(item)
98                continue
99
100        if os.path.isdir(itemsrc):
101            shutil.copytree(itemsrc, itemdst)
102        else:
103            shutil.copy2(itemsrc, itemdst)
104        if del_old:
105            remove_file_or_directory(itemsrc)
106    return not_copied
107
108
109def get_inner_HTML_part(html_code):
110    """Return the 'inner' part of a complete HTML snippet.
111
112    If there is a form part, get this.
113
114    If there is no form part, try to return the body part contents.
115
116    If there is no body, return as-is.
117
118    Let's see how that works. If we deliver some doc with form, we
119    will get that form only:
120
121       >>> doc = '<html><form>My Form</form>Outside the form</html>'
122       >>> get_inner_HTML_part(doc)
123       '<form>My Form</form>'
124
125    No form? Then seek for a body part and get the contents:
126
127       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
128       >>> get_inner_HTML_part(doc)
129       'My Body'
130
131    If none of these is included, return what we got:
132
133       >>> doc = '<html>without body nor form</html>'
134       >>> get_inner_HTML_part(doc)
135       '<html>without body nor form</html>'
136
137    """
138
139    try:
140        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
141                          re.DOTALL).groups()[0]
142        return result
143    except AttributeError:
144        # No <form> part included
145        try:
146            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
147                              re.DOTALL).groups()[0]
148            return result
149        except AttributeError:
150            # No <form> and no <body> tag...
151            pass
152    return html_code
153
154
155class FactoryBase(grok.GlobalUtility):
156    """A factory for things.
157
158    This is a baseclass for easier creation of factories. Factories
159    are utilities that are registered under a certain name and return
160    instances of certain classes when called.
161
162    In :mod:`waeup.kofa` we use factories extensively for
163    batching. While processing a batch some processors looks up a
164    factory to create real-world instances that then get filled with
165    data from imported CSV files.
166
167    To get rid of reimplementing the same stuff over and over again,
168    most notably the methods defined here, we offer this base class
169    (which will *not* be registered as a factory itself).
170
171    Real factories can then be created like this:
172
173       >>> import grok
174       >>> from waeup.kofa.utils.helpers import FactoryBase
175       >>> class MyObject(object):
176       ...   # Some class we want to get instances of.
177       ...   pass
178       >>> class MyObjectFactory(FactoryBase):
179       ...   # This is the factory for MyObject instances
180       ...   grok.name(u'waeup.kofa.factory.MyObject')
181       ...   factory = MyObject
182
183    That's it. It is essential to set the ``factory`` attribute, which
184    will determine the class of which instances should be created when
185    called. The given name must even be unique amongst all utilities
186    registered during runtime. While you can pick any name you like
187    you might want to prepend ``waeup.kofa.factory.`` to the name
188    string to make sure it does not clash with names of other
189    utilities one day.
190
191    Before all this works we have to grok the baseclass once and our
192    freshly defined factory. This executes all the component
193    registration stuff we don't want to do ourselves. In daily use
194    this is done automatically on startup of a :mod:`waeup.kofa`
195    system.
196
197       >>> grok.testing.grok('waeup.kofa.utils.helpers')
198       >>> grok.testing.grok_component(
199       ...    'MyObjectFactory', MyObjectFactory
200       ...  )
201       True
202
203    After grokking we (and processors) can create objects without
204    knowing about the location of the real class definition, just by
205    the factory name:
206
207       >>> from zope.component import createObject
208       >>> obj = createObject('waeup.kofa.factory.MyObject')
209       >>> isinstance(obj, MyObject)
210       True
211
212    We can also use the regular utility lookups to find our new
213    factory:
214
215       >>> from zope.component import getUtility
216       >>> from zope.component.interfaces import IFactory
217       >>> factory = getUtility(
218       ...   IFactory, name='waeup.kofa.factory.MyObject'
219       ...   )
220       >>> isinstance(factory, MyObjectFactory)
221       True
222
223    And this factory generates `MyObject` instances:
224
225       >>> obj = factory()
226       >>> isinstance(obj, MyObject)
227       True
228
229    """
230    grok.baseclass()  # Do not grok this class, do not register us.
231    grok.implements(IFactory)
232    # You can override any of the following attributes in derived
233    # classes. The `grok.name` setting *must* even be set to some
234    # unique value.
235    grok.name(u'waeup.Factory')
236    title = u"Create instances of ``factory``.",
237    description = u"This factory instantiates new applicant instances."
238    factory = None
239
240    def __call__(self, *args, **kw):
241        """The main factory function.
242
243        Returns an instance of the requested object.
244        """
245        return self.factory()
246
247    def getInterfaces(self):
248        # Required by IFactory
249        return implementedBy(self.factory)
250
251
252def ReST2HTML_w_warnings(source_string):
253    """Convert a reStructuredText string to HTML preserving warnings.
254
255    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
256    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
257    source string (in unicode), ``<WARNINGS>`` is a string containing
258    any warning messages or ``None``.
259
260    Regular multi-line ReStructuredText strings will be returned as
261    HTML code:
262
263        >>> from waeup.kofa.utils.helpers import ReST2HTML
264        >>> source = '''
265        ... Headline
266        ... ========
267        ...
268        ... - A list item
269        ... - Another item
270        ...
271        ... Thanks for watching!
272        ... '''
273        >>> html, warnings = ReST2HTML_w_warnings(source)
274        >>> print html
275        <div class="document" id="headline">
276        <h1 class="title">Headline</h1>
277        <BLANKLINE>
278        <ul class="simple">
279        <li>A list item</li>
280        <li>Another item</li>
281        </ul>
282        <p>Thanks for watching!</p>
283        </div>
284
285    Here no warnings happened, so the `warnings` are ``None``:
286
287        >>> warnings is None
288        True
289
290    If warnings happen then they can be retrieved in the returned
291    ``warnings``. We try to render an erraneous document:
292
293        >>> source = '''
294        ... Headline
295        ... ======
296        ...
297        ... Thanks for watching!
298        ... '''
299        >>> html, warnings = ReST2HTML_w_warnings(source)
300        >>> print html
301        <div class="document" id="headline">
302        <h1 class="title">Headline</h1>
303        <BLANKLINE>
304        <p>Thanks for watching!</p>
305        </div>
306
307        >>> print warnings
308        <string>:3: (WARNING/2) Title underline too short.
309        <BLANKLINE>
310        Headline
311        ======
312        <BLANKLINE>
313
314    As you can see, the warnings are not displayed inline the document
315    but can be retrieved from the returned warnings, which is a string
316    or ``None``.
317    """
318    warnings = StringIO()
319    fulldoc = publish_string(
320        source_string, writer_name='html4css1',
321        settings_overrides={
322            'report_level': 0,
323            'warning_stream': warnings,
324            })
325    warnings.seek(0)
326    warning_msgs = warnings.read()
327    if warning_msgs:
328        # Render again, this time with no warnings inline...
329        fulldoc = publish_string(
330        source_string, writer_name='html4css1',
331        settings_overrides={
332            'report_level': 10000,
333            'halt_level': 10000,
334            'warning_stream': warnings,
335            })
336    if warning_msgs == '':
337        warning_msgs = None
338    result = get_inner_HTML_part(fulldoc).strip()
339    if not isinstance(result, unicode):
340        result = result.decode('utf-8')
341    return result, warning_msgs
342
343
344def ReST2HTML(source_string):
345    """Render a string containing ReStructuredText to HTML.
346
347    Any warnings about too short headings, etc. are silently
348    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
349    warnings.
350
351    The returned string will be unicode.
352
353    A regular document will be rendered like this:
354
355        >>> source = '''
356        ... Headline
357        ... ========
358        ...
359        ... Thanks for watching!
360        ... '''
361        >>> html = ReST2HTML(source)
362        >>> print html
363        <div class="document" id="headline">
364        <h1 class="title">Headline</h1>
365        <BLANKLINE>
366        <p>Thanks for watching!</p>
367        </div>
368
369    A document with markup problems (here: the underline is too short)
370    will look similar:
371
372        >>> source = '''
373        ... Headline
374        ... ======
375        ...
376        ... Thanks for watching!
377        ... '''
378        >>> html = ReST2HTML(source)
379        >>> print html
380        <div class="document" id="headline">
381        <h1 class="title">Headline</h1>
382        <BLANKLINE>
383        <p>Thanks for watching!</p>
384        </div>
385
386    """
387    html, warnings = ReST2HTML_w_warnings(source_string)
388    return html
389
390
391def attrs_to_fields(cls, omit=[]):
392    """Turn the attributes of a class into FieldProperty instances.
393
394    With Python >= 2.6 we can even use this function as a class decorator.
395
396    `omit` is a list of field names that should _not_ be turned into
397    field properties. This is useful for properties and the like.
398    """
399    iface = list(implementedBy(cls))[0]
400    for field_name in getFieldNames(iface):
401        if field_name in omit:
402            continue
403        field_property = FieldProperty(iface[field_name])
404        # Set proper docstring for the API docs.
405        field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
406        setattr(cls, field_name, field_property)
407    return cls
408
409
410def get_current_principal():
411    """Get the 'current' principal.
412
413    This method works without a request. Examining a request is the
414    regular (and recommended) way to get a principal involved
415    'currently'.
416
417    Use this method only if you really have no access to the current
418    request.
419
420    Returns ``None`` when no principal is involved (for instance
421    during tests).
422    """
423    try:
424        principal = getInteraction().participations[0].principal
425    except NoInteraction:
426        return None
427    except IndexError:  # No participations present
428        return None
429    return principal
430
431
432def cmp_files(file_descr1, file_descr2):
433    """Compare two files by their file descriptors.
434
435    Returns ``True`` if both are equal, ``False`` otherwise.
436    """
437    file_descr1.seek(0)
438    file_descr2.seek(0)
439    while True:
440        b1 = file_descr1.read(BUFSIZE)
441        b2 = file_descr2.read(BUFSIZE)
442        if b1 != b2:
443            return False
444        if not b1:
445            return True
446
447
448def string_from_bytes(number):
449    """Turn a number into some textual representation.
450
451      Examples:
452
453        >>> string_from_bytes(1)
454        u'1 byte(s)'
455
456        >>> string_from_bytes(1025)
457        u'1 KB'
458
459        >>> string_from_bytes(1.5 * 1024*1024)
460        u'1.50 MB'
461
462        >>> string_from_bytes(673.286 * 1024**3)
463        u'673.29 GB'
464
465    """
466    if number < 1024:
467        return u'%s byte(s)' % (str(number),)
468    elif number < 1024 ** 2:
469        return u'%s KB' % (number / 1024,)
470    elif number < 1024 ** 3:
471        return u'%.2f MB' % (number / 1024 ** 2,)
472    return u'%.2f GB' % (number / 1024 ** 3,)
473
474
475def file_size(file_like_obj):
476    """Determine file size in most effective manner.
477
478    Returns the number of bytes in a file. This function works for
479    both, real files as well as file-like objects like cStringIO based
480    'files'.
481
482    Example:
483
484      >>> from cStringIO import StringIO
485      >>> file_size(StringIO('my file content'))
486      15
487
488    Please note that this function expects the file-like object passed
489    in to be at first reading position (it does no seek(0)) and that
490    when finished the file pointer might be at end of file.
491    """
492    if hasattr(file_like_obj, 'fileno'):
493        return os.fstat(file_like_obj.fileno())[6]
494    file_like_obj.seek(0, 2)  # seek to last position in file
495    return file_like_obj.tell()
496
497
498def get_user_account(request):
499    """Return local user account.
500    """
501    principal_id = request.principal.id
502    authenticator = getUtility(IAuthenticatorPlugin, name='users')
503    account = authenticator.getAccount(principal_id)
504    return account
505
506
507def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
508    """Get all attribute names of an interface.
509
510    Searches also base interfaces.
511
512    Names of fields that are pure attributes
513    (i.e. zope.interface.Attribute) or methods are excluded by
514    default.
515
516    Names of typical fields derived from zope.schema are included.
517
518    The `omit` paramter can give a list of names to exclude.
519
520    Returns an unsorted list of strings.
521    """
522    ifaces = set((iface,))
523    # Collect all interfaces (also bases) recursively
524    while True:
525        ext_ifaces = set(ifaces)
526        for iface in ext_ifaces:
527            ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
528        if ext_ifaces == ifaces:
529            # No new interfaces found, list complete
530            break
531        ifaces = ext_ifaces
532    # Collect (filtered) names of collected interfaces
533    result = []
534    for iface in ifaces:
535        for name, descr in iface.namesAndDescriptions():
536            if name in omit:
537                continue
538            if exclude_attribs and descr.__class__ is Attribute:
539                continue
540            if exclude_methods and isinstance(descr, Method):
541                continue
542            if name in result:
543                continue
544            result.append(name)
545    return result
546
547
548def get_sorted_preferred(tuples_iterable, preferred_list):
549    """Get a list of tuples (<TITLE>,<TOKEN>) with values in
550    `preferred_list` put in front.
551
552    The rest of the tuples iterable is returned in orginal order. This
553    is useful for putting default entries on top of (already sorted)
554    lists of choice values, for instance when sorting countries and
555    their code.
556
557    Sample:
558
559    We have a list of tuples with uppercase 'titles' and lowercase
560    'tokens'. This list is already sorted but we want certain values
561    of this list to show up before other values. For instance we want
562    to see the 'C' entry to come first.
563
564      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
565      ...                       ['c'])
566      (('C', 'c'), ('A', 'a'), ('B', 'b'))
567
568    i.e. the entry with 'c' as second value moved to head of result.
569
570    We can also require multiple entries at head of list:
571
572      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
573      ...                       ['b', 'c'])
574      (('B', 'b'), ('C', 'c'), ('A', 'a'))
575
576    We required the 'b' entry to come before the 'c' entry and then
577    the rest of the input list. That's what we got.
578
579    The result is returned as a tuple of tuples to keep order of values.
580    """
581    result = [None for x in preferred_list]
582    for title, code in tuples_iterable:
583        if code in preferred_list:
584            index = preferred_list.index(code)
585            result[index] = (title, code)
586        else:
587            result.append((title, code))
588    return tuple(result)
589
590
591def now(tz=None):
592    """Get current datetime in timezone of `tz`.
593
594    If `tz`, a `tzinfo` instance, is None, UTC time is returned.
595
596    `tz` should be a timezone as defined in pytz.
597    """
598    return to_timezone(datetime.datetime.utcnow(), tz=tz)
599
600
601def to_timezone(dt, tz=None):
602    """Shift datetime into timezone `tz`.
603
604    If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
605    assumed to be UTC.
606
607    If no `tz` is given, shift to UTC is performed.
608
609    If `dt` is not a datetime.datetime, the input value is returned
610    unchanged.
611    """
612    if not isinstance(dt, datetime.datetime):
613        return dt
614    if tz is None:
615        tz = pytz.utc
616    if dt.tzinfo is None:
617        dt = pytz.utc.localize(dt)
618    return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
619
620
621def get_fileformat(path, bytestream=None):
622    """Try to determine the file format of a given media file.
623
624    Although checks done here are not done very thoroughly, they make
625    no assumptions about the filetype by looking at its filename
626    extension or similar. Instead they check header data to comply
627    with common known rules (Magic Words).
628
629    If bytestream is not `None` the `path` is ignored.
630
631    Returns filetype as string (something like ``'jpg'``) if
632    file-format can be recognized, ``None`` else.
633
634    Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
635
636    More filetypes (though untested in waeup.kofa) are automatically
637    recognized because we deploy the stdlib `imghdr` library. See this
638    module's docs for a complete list of filetypes recognized.
639    """
640    if path is None and bytestream is None:
641        return None
642
643    img_type = None
644    if bytestream is not None:
645        img_type = imghdr.what(path, bytestream)
646    else:
647        img_type = imghdr.what(path)
648    for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
649        if img_type == name:
650            img_type = replacement
651    return img_type
652
653
654def check_pdf(bytestream, file):
655    """Tell whether a file or bytestream is a PDF file.
656
657    Works as a test/plugin for the stdlib `imghdr` library.
658    """
659    if file is not None:
660        file.seek(0)
661        bytestream = file.read(4)
662        file.seek(0)
663
664    if bytestream.startswith('%PDF'):
665        return 'pdf'
666    return None
667
668# register check_pdf as header check function with `imghdr`
669if check_pdf not in imghdr.tests:
670    imghdr.tests.append(check_pdf)
671
672
673def merge_csv_files(path1, path2):
674    """Merge two CSV files into one (appending).
675
676    CSV data from `path2` will be merged into `path1` csv file. This
677    is a bit like 'appending' data from path2 to data from path1.
678
679    The path of the resulting temporary file will be returned.
680
681    In the result file data from `path2` will always come _after_ data
682    from `path1`.
683
684    **Caution**: It is the _callers_ responsibility to remove the
685    result file (which is created by tempfile.mkstemp) after usage.
686
687    This CSV file merging copes with different column orders in both
688    CSV files and even with different column sets in both files.
689
690    Also broken/empty CSV files can be handled.
691    """
692    # sniff the col names
693    try:
694        row10 = csv.DictReader(open(path1, 'rb')).next()
695    except StopIteration:
696        row10 = dict()
697    try:
698        row20 = csv.DictReader(open(path2, 'rb')).next()
699    except StopIteration:
700        row20 = dict()
701    fieldnames = sorted(list(set(row10.keys() + row20.keys())))
702    # now read/write the real data
703    reader1 = csv.DictReader(open(path1, 'rb'))
704    reader2 = csv.DictReader(open(path2, 'rb'))
705    wp, tmp_path = tempfile.mkstemp()
706    writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
707    writer.writerow(dict((x, x) for x in fieldnames))  # header
708    for row in reader1:
709        writer.writerow(row)
710    for row in reader2:
711        writer.writerow(row)
712    return tmp_path
713
714
715def product(sequence, start=1):
716    """Returns the product of a sequence of numbers (_not_ strings)
717    multiplied by the parameter `start` (defaults to 1). If the
718    sequence is empty, returns 0.
719    """
720    if not len(sequence):
721        return 0
722    result = start
723    for item in sequence:
724        result *= item
725    return result
726
727
728class NullHandler(logging.Handler):
729    """A logging NullHandler.
730
731    Does not log anything. Useful if you want to shut up a log.
732
733    Defined here for backwards compatibility with Python < 2.7.
734    """
735    def emit(self, record):
736        pass
737
738
739def check_csv_charset(iterable):
740    """Check contents of `iterable` regarding valid CSV encoding.
741
742    `iterable` is expected to be an iterable on _rows_ (not
743    chars). This is true for instance for
744    filehandlers. `zope.publisher.browser.FileUpload` instances are
745    _not_ iterable, unfortunately.
746
747    Returns line num of first illegal char or ``None``. Line nums
748    start counting with 1 (not zero).
749    """
750    linenum = 1
751    reader = csv.DictReader(iterable)
752    try:
753        for row in reader:
754            linenum += 1
755    except UnicodeDecodeError:
756        return linenum
757    except:
758        return linenum + 1
759    return None
Note: See TracBrowser for help on using the repository browser.