source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 15595

Last change on this file since 15595 was 15595, checked in by uli, 5 years ago

Add a function to extract key/value pairs from

HTML forms.

  • Property svn:keywords set to Id
File size: 28.1 KB
Line 
1## $Id: helpers.py 15595 2019-09-19 23:38:52Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""General helper functions for Kofa.
19"""
20import unicodecsv as csv  # XXX: csv ops should move to dedicated module.
21import datetime
22import imghdr
23import logging
24import os
25import pytz
26import re
27import shutil
28import tempfile
29import grok
30from cStringIO import StringIO
31from docutils.core import publish_string
32from HTMLParser import HTMLParser
33from zope.component import getUtility
34from zope.component.interfaces import IFactory
35from zope.interface import implementedBy
36from zope.interface.interface import Method, Attribute
37from zope.schema import getFieldNames
38from zope.schema.fieldproperty import FieldProperty
39from zope.security.interfaces import NoInteraction
40from zope.security.management import getInteraction
41from zope.pluggableauth.interfaces import IAuthenticatorPlugin
42from zope.formlib.widget import renderElement
43
44BUFSIZE = 8 * 1024
45
46
47def remove_file_or_directory(filepath):
48    """Remove a file or directory.
49
50    Different to :func:`shutil.rmtree` we also accept not existing
51    paths (returning silently) and if a dir turns out to be a regular
52    file, we remove that.
53    """
54    filepath = os.path.abspath(filepath)
55    if not os.path.exists(filepath):
56        return
57    if os.path.isdir(filepath):
58        shutil.rmtree(filepath)
59    else:
60        os.unlink(filepath)
61    return
62
63
64def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
65    """Copy contents of directory src to directory dst.
66
67    Both directories must exists.
68
69    If `overwrite` is true, any same named objects will be
70    overwritten. Otherwise these files will not be touched.
71
72    If `del_old` is true, copied files and directories will be removed
73    from the src directory.
74
75    This functions returns a list of non-copied files.
76
77    Unix hidden files and directories (starting with '.') are not
78    processed by this function.
79    """
80    if not os.path.exists(src):
81        raise ValueError('source path does not exist: %s' % src)
82    if not os.path.exists(dst):
83        raise ValueError('destination path does not exist: %s' % dst)
84    if not os.path.isdir(src):
85        raise ValueError('source path is not a directory: %s' % src)
86    if not os.path.isdir(dst):
87        raise ValueError('destination path is not a directory: %s' % dst)
88    not_copied = []
89    for item in os.listdir(src):
90        if item.startswith('.'):
91            continue  # We do not copy hidden stuff...
92        itemsrc = os.path.join(src, item)
93        itemdst = os.path.join(dst, item)
94
95        if os.path.exists(itemdst):
96            if overwrite is True:
97                remove_file_or_directory(itemdst)
98            else:
99                not_copied.append(item)
100                continue
101
102        if os.path.isdir(itemsrc):
103            shutil.copytree(itemsrc, itemdst)
104        else:
105            shutil.copy2(itemsrc, itemdst)
106        if del_old:
107            remove_file_or_directory(itemsrc)
108    return not_copied
109
110
111def get_inner_HTML_part(html_code):
112    """Return the 'inner' part of a complete HTML snippet.
113
114    If there is a form part, get this.
115
116    If there is no form part, try to return the body part contents.
117
118    If there is no body, return as-is.
119
120    Let's see how that works. If we deliver some doc with form, we
121    will get that form only:
122
123       >>> doc = '<html><form>My Form</form>Outside the form</html>'
124       >>> get_inner_HTML_part(doc)
125       '<form>My Form</form>'
126
127    No form? Then seek for a body part and get the contents:
128
129       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
130       >>> get_inner_HTML_part(doc)
131       'My Body'
132
133    If none of these is included, return what we got:
134
135       >>> doc = '<html>without body nor form</html>'
136       >>> get_inner_HTML_part(doc)
137       '<html>without body nor form</html>'
138
139    """
140
141    try:
142        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
143                          re.DOTALL).groups()[0]
144        return result
145    except AttributeError:
146        # No <form> part included
147        try:
148            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
149                              re.DOTALL).groups()[0]
150            return result
151        except AttributeError:
152            # No <form> and no <body> tag...
153            pass
154    return html_code
155
156
157class FactoryBase(grok.GlobalUtility):
158    """A factory for things.
159
160    This is a baseclass for easier creation of factories. Factories
161    are utilities that are registered under a certain name and return
162    instances of certain classes when called.
163
164    In :mod:`waeup.kofa` we use factories extensively for
165    batching. While processing a batch some processors looks up a
166    factory to create real-world instances that then get filled with
167    data from imported CSV files.
168
169    To get rid of reimplementing the same stuff over and over again,
170    most notably the methods defined here, we offer this base class
171    (which will *not* be registered as a factory itself).
172
173    Real factories can then be created like this:
174
175       >>> import grok
176       >>> from waeup.kofa.utils.helpers import FactoryBase
177       >>> class MyObject(object):
178       ...   # Some class we want to get instances of.
179       ...   pass
180       >>> class MyObjectFactory(FactoryBase):
181       ...   # This is the factory for MyObject instances
182       ...   grok.name(u'waeup.kofa.factory.MyObject')
183       ...   factory = MyObject
184
185    That's it. It is essential to set the ``factory`` attribute, which
186    will determine the class of which instances should be created when
187    called. The given name must even be unique amongst all utilities
188    registered during runtime. While you can pick any name you like
189    you might want to prepend ``waeup.kofa.factory.`` to the name
190    string to make sure it does not clash with names of other
191    utilities one day.
192
193    Before all this works we have to grok the baseclass once and our
194    freshly defined factory. This executes all the component
195    registration stuff we don't want to do ourselves. In daily use
196    this is done automatically on startup of a :mod:`waeup.kofa`
197    system.
198
199       >>> grok.testing.grok('waeup.kofa.utils.helpers')
200       >>> grok.testing.grok_component(
201       ...    'MyObjectFactory', MyObjectFactory
202       ...  )
203       True
204
205    After grokking we (and processors) can create objects without
206    knowing about the location of the real class definition, just by
207    the factory name:
208
209       >>> from zope.component import createObject
210       >>> obj = createObject('waeup.kofa.factory.MyObject')
211       >>> isinstance(obj, MyObject)
212       True
213
214    We can also use the regular utility lookups to find our new
215    factory:
216
217       >>> from zope.component import getUtility
218       >>> from zope.component.interfaces import IFactory
219       >>> factory = getUtility(
220       ...   IFactory, name='waeup.kofa.factory.MyObject'
221       ...   )
222       >>> isinstance(factory, MyObjectFactory)
223       True
224
225    And this factory generates `MyObject` instances:
226
227       >>> obj = factory()
228       >>> isinstance(obj, MyObject)
229       True
230
231    """
232    grok.baseclass()  # Do not grok this class, do not register us.
233    grok.implements(IFactory)
234    # You can override any of the following attributes in derived
235    # classes. The `grok.name` setting *must* even be set to some
236    # unique value.
237    grok.name(u'waeup.Factory')
238    title = u"Create instances of ``factory``.",
239    description = u"This factory instantiates new applicant instances."
240    factory = None
241
242    def __call__(self, *args, **kw):
243        """The main factory function.
244
245        Returns an instance of the requested object.
246        """
247        return self.factory()
248
249    def getInterfaces(self):
250        # Required by IFactory
251        return implementedBy(self.factory)
252
253
254def ReST2HTML_w_warnings(source_string):
255    """Convert a reStructuredText string to HTML preserving warnings.
256
257    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
258    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
259    source string (in unicode), ``<WARNINGS>`` is a string containing
260    any warning messages or ``None``.
261
262    Regular multi-line ReStructuredText strings will be returned as
263    HTML code:
264
265        >>> from waeup.kofa.utils.helpers import ReST2HTML
266        >>> source = '''
267        ... Headline
268        ... ========
269        ...
270        ... - A list item
271        ... - Another item
272        ...
273        ... Thanks for watching!
274        ... '''
275        >>> html, warnings = ReST2HTML_w_warnings(source)
276        >>> print html
277        <div class="document" id="headline">
278        <h1 class="title">Headline</h1>
279        <BLANKLINE>
280        <ul class="simple">
281        <li>A list item</li>
282        <li>Another item</li>
283        </ul>
284        <p>Thanks for watching!</p>
285        </div>
286
287    Here no warnings happened, so the `warnings` are ``None``:
288
289        >>> warnings is None
290        True
291
292    If warnings happen then they can be retrieved in the returned
293    ``warnings``. We try to render an erraneous document:
294
295        >>> source = '''
296        ... Headline
297        ... ======
298        ...
299        ... Thanks for watching!
300        ... '''
301        >>> html, warnings = ReST2HTML_w_warnings(source)
302        >>> print html
303        <div class="document" id="headline">
304        <h1 class="title">Headline</h1>
305        <BLANKLINE>
306        <p>Thanks for watching!</p>
307        </div>
308
309        >>> print warnings
310        <string>:3: (WARNING/2) Title underline too short.
311        <BLANKLINE>
312        Headline
313        ======
314        <BLANKLINE>
315
316    As you can see, the warnings are not displayed inline the document
317    but can be retrieved from the returned warnings, which is a string
318    or ``None``.
319    """
320    warnings = StringIO()
321    fulldoc = publish_string(
322        source_string, writer_name='html4css1',
323        settings_overrides={
324            'report_level': 0,
325            'warning_stream': warnings,
326            })
327    warnings.seek(0)
328    warning_msgs = warnings.read()
329    if warning_msgs:
330        # Render again, this time with no warnings inline...
331        fulldoc = publish_string(
332        source_string, writer_name='html4css1',
333        settings_overrides={
334            'report_level': 10000,
335            'halt_level': 10000,
336            'warning_stream': warnings,
337            })
338    if warning_msgs == '':
339        warning_msgs = None
340    result = get_inner_HTML_part(fulldoc).strip()
341    if not isinstance(result, unicode):
342        result = result.decode('utf-8')
343    return result, warning_msgs
344
345
346def ReST2HTML(source_string):
347    """Render a string containing ReStructuredText to HTML.
348
349    Any warnings about too short headings, etc. are silently
350    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
351    warnings.
352
353    The returned string will be unicode.
354
355    A regular document will be rendered like this:
356
357        >>> source = '''
358        ... Headline
359        ... ========
360        ...
361        ... Thanks for watching!
362        ... '''
363        >>> html = ReST2HTML(source)
364        >>> print html
365        <div class="document" id="headline">
366        <h1 class="title">Headline</h1>
367        <BLANKLINE>
368        <p>Thanks for watching!</p>
369        </div>
370
371    A document with markup problems (here: the underline is too short)
372    will look similar:
373
374        >>> source = '''
375        ... Headline
376        ... ======
377        ...
378        ... Thanks for watching!
379        ... '''
380        >>> html = ReST2HTML(source)
381        >>> print html
382        <div class="document" id="headline">
383        <h1 class="title">Headline</h1>
384        <BLANKLINE>
385        <p>Thanks for watching!</p>
386        </div>
387
388    """
389    html, warnings = ReST2HTML_w_warnings(source_string)
390    return html
391
392
393def attrs_to_fields(cls, omit=[]):
394    """Set class attributes and bind them to the data definitions
395    specified in the interface by turning the attributes into FieldProperty
396    instances.
397
398    With Python >= 2.6 we can even use this function as a class decorator.
399
400    `omit` is a list of field names that should _not_ be turned into
401    field properties. This is useful for properties and the like.
402    """
403    iface = list(implementedBy(cls))[0]
404    for field_name in getFieldNames(iface):
405        if field_name in omit:
406            continue
407        field_property = FieldProperty(iface[field_name])
408        # Set proper docstring for the API docs.
409        field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
410        setattr(cls, field_name, field_property)
411    return cls
412
413
414def get_current_principal():
415    """Get the 'current' principal.
416
417    This method works without a request. Examining a request is the
418    regular (and recommended) way to get a principal involved
419    'currently'.
420
421    Use this method only if you really have no access to the current
422    request.
423
424    Returns ``None`` when no principal is involved (for instance
425    during tests).
426    """
427    try:
428        principal = getInteraction().participations[0].principal
429    except NoInteraction:
430        return None
431    except IndexError:  # No participations present
432        return None
433    return principal
434
435
436def cmp_files(file_descr1, file_descr2):
437    """Compare two files by their file descriptors.
438
439    Returns ``True`` if both are equal, ``False`` otherwise.
440    """
441    file_descr1.seek(0)
442    file_descr2.seek(0)
443    while True:
444        b1 = file_descr1.read(BUFSIZE)
445        b2 = file_descr2.read(BUFSIZE)
446        if b1 != b2:
447            return False
448        if not b1:
449            return True
450
451
452def string_from_bytes(number):
453    """Turn a number into some textual representation.
454
455      Examples:
456
457        >>> string_from_bytes(1)
458        u'1 byte(s)'
459
460        >>> string_from_bytes(1025)
461        u'1 KB'
462
463        >>> string_from_bytes(1.5 * 1024*1024)
464        u'1.50 MB'
465
466        >>> string_from_bytes(673.286 * 1024**3)
467        u'673.29 GB'
468
469    """
470    if number < 1024:
471        return u'%s byte(s)' % (str(number),)
472    elif number < 1024 ** 2:
473        return u'%s KB' % (number / 1024,)
474    elif number < 1024 ** 3:
475        return u'%.2f MB' % (number / 1024 ** 2,)
476    return u'%.2f GB' % (number / 1024 ** 3,)
477
478
479def file_size(file_like_obj):
480    """Determine file size in most effective manner.
481
482    Returns the number of bytes in a file. This function works for
483    both, real files as well as file-like objects like cStringIO based
484    'files'.
485
486    Example:
487
488      >>> from cStringIO import StringIO
489      >>> file_size(StringIO('my file content'))
490      15
491
492    Please note that this function expects the file-like object passed
493    in to be at first reading position (it does no seek(0)) and that
494    when finished the file pointer might be at end of file.
495    """
496    if hasattr(file_like_obj, 'fileno'):
497        return os.fstat(file_like_obj.fileno())[6]
498    file_like_obj.seek(0, 2)  # seek to last position in file
499    return file_like_obj.tell()
500
501
502def get_user_account(request):
503    """Return local user account.
504    """
505    principal_id = request.principal.id
506    authenticator = getUtility(IAuthenticatorPlugin, name='users')
507    account = authenticator.getAccount(principal_id)
508    return account
509
510
511def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
512    """Get all attribute names of an interface.
513
514    Searches also base interfaces.
515
516    Names of fields that are pure attributes
517    (i.e. zope.interface.Attribute) or methods are excluded by
518    default.
519
520    Names of typical fields derived from zope.schema are included.
521
522    The `omit` paramter can give a list of names to exclude.
523
524    Returns an unsorted list of strings.
525    """
526    ifaces = set((iface,))
527    # Collect all interfaces (also bases) recursively
528    while True:
529        ext_ifaces = set(ifaces)
530        for iface in ext_ifaces:
531            ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
532        if ext_ifaces == ifaces:
533            # No new interfaces found, list complete
534            break
535        ifaces = ext_ifaces
536    # Collect (filtered) names of collected interfaces
537    result = []
538    for iface in ifaces:
539        for name, descr in iface.namesAndDescriptions():
540            if name in omit:
541                continue
542            if exclude_attribs and descr.__class__ is Attribute:
543                continue
544            if exclude_methods and isinstance(descr, Method):
545                continue
546            if name in result:
547                continue
548            result.append(name)
549    return result
550
551
552def get_sorted_preferred(tuples_iterable, preferred_list):
553    """Get a list of tuples (<TITLE>,<TOKEN>) with values in
554    `preferred_list` put in front.
555
556    The rest of the tuples iterable is returned in orginal order. This
557    is useful for putting default entries on top of (already sorted)
558    lists of choice values, for instance when sorting countries and
559    their code.
560
561    Sample:
562
563    We have a list of tuples with uppercase 'titles' and lowercase
564    'tokens'. This list is already sorted but we want certain values
565    of this list to show up before other values. For instance we want
566    to see the 'C' entry to come first.
567
568      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
569      ...                       ['c'])
570      (('C', 'c'), ('A', 'a'), ('B', 'b'))
571
572    i.e. the entry with 'c' as second value moved to head of result.
573
574    We can also require multiple entries at head of list:
575
576      >>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
577      ...                       ['b', 'c'])
578      (('B', 'b'), ('C', 'c'), ('A', 'a'))
579
580    We required the 'b' entry to come before the 'c' entry and then
581    the rest of the input list. That's what we got.
582
583    The result is returned as a tuple of tuples to keep order of values.
584    """
585    result = [None for x in preferred_list]
586    for title, code in tuples_iterable:
587        if code in preferred_list:
588            index = preferred_list.index(code)
589            result[index] = (title, code)
590        else:
591            result.append((title, code))
592    return tuple(result)
593
594
595def now(tz=None):
596    """Get current datetime in timezone of `tz`.
597
598    If `tz`, a `tzinfo` instance, is None, UTC time is returned.
599
600    `tz` should be a timezone as defined in pytz.
601    """
602    return to_timezone(datetime.datetime.utcnow(), tz=tz)
603
604
605def to_timezone(dt, tz=None):
606    """Shift datetime into timezone `tz`.
607
608    If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
609    assumed to be UTC.
610
611    If no `tz` is given, shift to UTC is performed.
612
613    If `dt` is not a datetime.datetime, the input value is returned
614    unchanged.
615    """
616    if not isinstance(dt, datetime.datetime):
617        return dt
618    if tz is None:
619        tz = pytz.utc
620    if dt.tzinfo is None:
621        dt = pytz.utc.localize(dt)
622    return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
623
624
625def imghdr_test_fpm(h, f):
626    """FPM fileformat test.
627
628    The `fpm` fileformat is the binary fingerprint data as created by
629    `libfprint`.
630    """
631    if len(h) >= 3 and h[:3] == 'FP1':
632        return 'fpm'
633
634
635#: Add test function in stdlib's imghdr tests.
636imghdr.tests.append(imghdr_test_fpm)
637
638
639def get_fileformat(path, bytestream=None):
640    """Try to determine the file format of a given media file.
641
642    Although checks done here are not done very thoroughly, they make
643    no assumptions about the filetype by looking at its filename
644    extension or similar. Instead they check header data to comply
645    with common known rules (Magic Words).
646
647    If bytestream is not `None` the `path` is ignored.
648
649    Returns filetype as string (something like ``'jpg'``) if
650    file-format can be recognized, ``None`` else.
651
652    Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
653    `pdf`.
654
655    More filetypes (though untested in waeup.kofa) are automatically
656    recognized because we deploy the stdlib `imghdr` library. See this
657    module's docs for a complete list of filetypes recognized.
658    """
659    if path is None and bytestream is None:
660        return None
661
662    img_type = None
663    if bytestream is not None:
664        img_type = imghdr.what(path, bytestream)
665    else:
666        img_type = imghdr.what(path)
667    for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
668        if img_type == name:
669            img_type = replacement
670    return img_type
671
672
673def check_pdf(bytestream, file):
674    """Tell whether a file or bytestream is a PDF file.
675
676    Works as a test/plugin for the stdlib `imghdr` library.
677    """
678    if file is not None:
679        file.seek(0)
680        bytestream = file.read(4)
681        file.seek(0)
682
683    if bytestream.startswith('%PDF'):
684        return 'pdf'
685    return None
686
687# register check_pdf as header check function with `imghdr`
688if check_pdf not in imghdr.tests:
689    imghdr.tests.append(check_pdf)
690
691
692def merge_csv_files(path1, path2):
693    """Merge two CSV files into one (appending).
694
695    CSV data from `path2` will be merged into `path1` csv file. This
696    is a bit like 'appending' data from path2 to data from path1.
697
698    The path of the resulting temporary file will be returned.
699
700    In the result file data from `path2` will always come _after_ data
701    from `path1`.
702
703    **Caution**: It is the _callers_ responsibility to remove the
704    result file (which is created by tempfile.mkstemp) after usage.
705
706    This CSV file merging copes with different column orders in both
707    CSV files and even with different column sets in both files.
708
709    Also broken/empty CSV files can be handled.
710    """
711    # sniff the col names
712    try:
713        row10 = csv.DictReader(open(path1, 'rb')).next()
714    except StopIteration:
715        row10 = dict()
716    try:
717        row20 = csv.DictReader(open(path2, 'rb')).next()
718    except StopIteration:
719        row20 = dict()
720    fieldnames = sorted(list(set(row10.keys() + row20.keys())))
721    # now read/write the real data
722    reader1 = csv.DictReader(open(path1, 'rb'))
723    reader2 = csv.DictReader(open(path2, 'rb'))
724    wp, tmp_path = tempfile.mkstemp()
725    writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
726    writer.writerow(dict((x, x) for x in fieldnames))  # header
727    for row in reader1:
728        writer.writerow(row)
729    for row in reader2:
730        writer.writerow(row)
731    return tmp_path
732
733
734def product(sequence, start=1):
735    """Returns the product of a sequence of numbers (_not_ strings)
736    multiplied by the parameter `start` (defaults to 1). If the
737    sequence is empty, returns 0.
738    """
739    if not len(sequence):
740        return 0
741    result = start
742    for item in sequence:
743        result *= item
744    return result
745
746
747class NullHandler(logging.Handler):
748    """A logging NullHandler.
749
750    Does not log anything. Useful if you want to shut up a log.
751
752    Defined here for backwards compatibility with Python < 2.7.
753    """
754    def emit(self, record):
755        pass
756
757
758def check_csv_charset(iterable):
759    """Check contents of `iterable` regarding valid CSV encoding and
760    trailing whitespaces in data.
761
762    `iterable` is expected to be an iterable on _rows_ (not
763    chars). This is true for instance for
764    filehandlers. `zope.publisher.browser.FileUpload` instances are
765    _not_ iterable, unfortunately.
766
767    Returns line num of first illegal char or ``None``. Line nums
768    start counting with 1 (not zero). Returns -1 if data contain
769    trailing whitespaces.
770    """
771    linenum = 1
772    try:
773        reader = csv.DictReader(iterable)
774        for row in reader:
775            linenum += 1
776            for value in row.values():
777                if value.endswith(' '):
778                    return -1
779    except UnicodeDecodeError:
780        return linenum
781    except:
782        return linenum + 1
783    return None
784
785
786class MemInfo(dict):
787    """A dict with access to its items like if they are attributes.
788    """
789    __getattr__ = dict.__getitem__
790    __setattr__ = dict.__setitem__
791    __delattr__ = dict.__delitem__
792
793
794def get_meminfo(src="/proc/meminfo"):
795    """Get local memory info as provided in /proc/meminfo.
796
797    Entries in /proc/meminfo are available as MemInfo attributes.
798
799    By default we lookup a file /proc/meminfo. Another path can be
800    lines = open(src, 'r').read()passed in as `src` parameter. In this
801    case `src` must be a regular file and contain meminfo-style data.
802
803    If the given `src` (or `/proc/meminfo`) are not available, `None`
804    lines = open(src, 'r').read()is returned.
805    """
806    if not os.path.isfile(src):
807        return None
808    lines = open(src, 'r').read().splitlines()
809    result = MemInfo()
810    for line in lines:
811        key, value = line.split(':', 1)
812        value = int(value.split(' kB', 1)[0])
813        result[key] = value
814    return result
815
816def html2dict(value=None,portal_language='en'):
817    """Transforms a localized HTML text string into a dictionary.
818
819    Different languages must be separated by ``>>xy<<`` whereas
820    xy is the language code. Text parts without correct leading
821    language separator - usually the first part has no language
822    descriptor - are interpreted as texts in the portal's language.
823    """
824    try:
825        parts = value.split('>>')
826    except:
827        return {}
828    elements = {}
829    lang = portal_language
830    for part in parts:
831        if part[2:4] == u'<<':
832            lang = str(part[0:2].lower())
833            text = part[4:]
834            elements[lang] = renderElement(u'div id="html"',
835                contents=text)
836        else:
837            text = part
838            elements[lang] = renderElement(u'div id="html"',
839                contents=text)
840    return elements
841
842def rest2dict(value=None,portal_language='en'):
843    """Transforms a localized REST text string into a dictionary.
844
845    Different languages must be separated by ``>>xy<<``` whereas
846    xy is the language code. Text parts without correct leading
847    language separator - usually the first part has no language
848    descriptor - are interpreted as texts in the portal's language.
849    """
850    try:
851        parts = value.split('>>')
852    except:
853        return {}
854    elements = {}
855    lang = portal_language
856    for part in parts:
857        if part[2:4] == u'<<':
858            lang = str(part[0:2].lower())
859            text = part[4:]
860            elements[lang] = renderElement(u'div id="rest"',
861                contents=ReST2HTML(text))
862        else:
863            text = part
864            elements[lang] = renderElement(u'div id="rest"',
865                contents=ReST2HTML(text))
866    return elements
867
868
869
870class FormVarParser(HTMLParser):
871    """An HTML form parser that extracts keys and values.
872
873       Fed with an HTML document, we parse all starttags and check for each,
874       whether it provides a `name` and a `value` attribute. If so, the
875       values of the respective attributes are stored in instance var
876       `form_vars` as a dict entry.
877    """
878
879    def __init__(self):
880        HTMLParser.__init__(self)  # old-style class - no super()
881        self.form_vars = {}
882
883    def handle_starttag(self, tag, attrs):
884        tag_attrs = {}
885        for key, val in attrs:
886            tag_attrs[key] = val
887        if 'name' in tag_attrs and 'value' in tag_attrs:
888            self.form_vars[tag_attrs['name']] = tag_attrs['value']
889
890
891def extract_formvars(html_code):
892    """Extract keys and values from an HTML form as dict.
893
894       No text, no values::
895
896         >>> extract_formvars("")
897         {}
898
899       Simple input tags normally provide name and value::
900
901         >>> extract_formvars("<input type='text' name='foo' value='bar'>")
902         {'foo': 'bar'}
903
904       The sample doc we stored in tests is a bit more difficult::
905
906         >>> html_path = os.path.join(os.path.dirname(__file__),
907         ...                          'tests', 'sample_response.html')
908         >>> html_code = open(html_path, 'r').read()
909         >>> import pprint
910         >>> pprint.pprint(extract_formvars(html_code))
911         {'AMOUNT': '100',
912         ...
913          'TRANS_NUM': '01ESA20190916134824YA3YJ8'}
914
915    """
916    result = {}
917    parser = FormVarParser()
918    parser.feed(html_code)
919    return parser.form_vars
Note: See TracBrowser for help on using the repository browser.