source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 6362

Last change on this file since 6362 was 6113, checked in by uli, 14 years ago
  • Remove trailing whitespaces.
  • Extends docs.
File size: 11.3 KB
Line 
1"""General helper functions for WAeUP.
2"""
3import os
4import re
5import sys
6import shutil
7import grok
8from cStringIO import StringIO
9from docutils.core import publish_string
10from zope.component.interfaces import IFactory
11from zope.interface import implementedBy
12from zope.schema import getFieldNames
13from zope.schema.fieldproperty import FieldProperty
14
15def removeFileOrDirectory(filepath):
16    """Remove a file or directory.
17
18    Different to :func:`shutil.rmtree` we also accept not existing
19    paths (returning silently) and if a dir turns out to be a regular
20    file, we remove that.
21    """
22    filepath = os.path.abspath(filepath)
23    if not os.path.exists(filepath):
24        return
25    if os.path.isdir(filepath):
26        shutil.rmtree(filepath)
27    else:
28        os.unlink(filepath)
29    return
30
31def copyFileSystemTree(src, dst, overwrite=False, del_old=False):
32    """Copy contents of directory src to directory dst.
33
34    Both directories must exists.
35
36    If `overwrite` is true, any same named objects will be
37    overwritten. Otherwise these files will not be touched.
38
39    If `del_old` is true, copied files and directories will be removed
40    from the src directory.
41
42    This functions returns a list of non-copied files.
43
44    Unix hidden files and directories (starting with '.') are not
45    processed by this function.
46    """
47    if not os.path.exists(src):
48        raise ValueError('source path does not exist: %s' % src)
49    if not os.path.exists(dst):
50        raise ValueError('destination path does not exist: %s' % dst)
51    if not os.path.isdir(src):
52        raise ValueError('source path is not a directory: %s' % src)
53    if not os.path.isdir(dst):
54        raise ValueError('destination path is not a directory: %s' % dst)
55    not_copied = []
56    for item in os.listdir(src):
57        if item.startswith('.'):
58            continue # We do not copy hidden stuff...
59        itemsrc = os.path.join(src, item)
60        itemdst = os.path.join(dst, item)
61
62        if os.path.exists(itemdst):
63            if overwrite is True:
64                removeFileOrDirectory(itemdst)
65            else:
66                not_copied.append(item)
67                continue
68
69        if os.path.isdir(itemsrc):
70            shutil.copytree(itemsrc, itemdst)
71        else:
72            shutil.copy2(itemsrc, itemdst)
73        if del_old:
74            removeFileOrDirectory(itemsrc)
75    return not_copied
76
77
78def getInnerHTMLPart(html_code):
79    """Return the 'inner' part of a complete HTML snippet.
80
81    If there is a form part, get this.
82
83    If there is no form part, try to return the body part contents.
84
85    If there is no body, return as-is.
86
87    Let's see how that works. If we deliver some doc with form, we
88    will get that form only:
89
90       >>> doc = '<html><form>My Form</form>Outside the form</html>'
91       >>> getInnerHTMLPart(doc)
92       '<form>My Form</form>'
93
94    No form? Then seek for a body part and get the contents:
95
96       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
97       >>> getInnerHTMLPart(doc)
98       'My Body'
99
100    If none of these is included, return what we got:
101
102       >>> doc = '<html>without body nor form</html>'
103       >>> getInnerHTMLPart(doc)
104       '<html>without body nor form</html>'
105
106    """
107
108    try:
109        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
110                          re.DOTALL).groups()[0]
111        return result
112    except AttributeError:
113        # No <form> part included
114        try:
115            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
116                              re.DOTALL).groups()[0]
117            return result
118        except AttributeError:
119            # No <form> and no <body> tag...
120            pass
121    return html_code
122
123class FactoryBase(grok.GlobalUtility):
124    """A factory for things.
125
126    This is a baseclass for easier creation of factories. Factories
127    are utilities that are registered under a certain name and return
128    instances of certain classes when called.
129
130    In :mod:`waeup.sirp` we use factories extensively for
131    batching. While processing a batch some importer looks up a
132    factory to create real-world instances that then get filled with
133    data from imported CSV files.
134
135    To get rid of reimplementing the same stuff over and over again,
136    most notably the methods defined here, we offer this base class
137    (which will *not* be registered as a factory itself).
138
139    Real factories can then be created like this:
140
141       >>> import grok
142       >>> from waeup.sirp.utils.helpers import FactoryBase
143       >>> class MyObject(object):
144       ...   # Some class we want to get instances of.
145       ...   pass
146       >>> class MyObjectFactory(FactoryBase):
147       ...   # This is the factory for MyObject instances
148       ...   grok.name(u'waeup.sirp.factory.MyObject')
149       ...   factory = MyObject
150
151    That's it. It is essential to set the ``factory`` attribute, which
152    will determine the class of which instances should be created when
153    called. The given name must even be unique amongst all utilities
154    registered during runtime. While you can pick any name you like
155    you might want to prepend ``waeup.sirp.factory.`` to the name
156    string to make sure it does not clash with names of other
157    utilities one day.
158
159    Before all this works we have to grok the baseclass once and our
160    freshly defined factory. This executes all the component
161    registration stuff we don't want to do ourselves. In daily use
162    this is done automatically on startup of a :mod:`waeup.sirp`
163    system.
164
165       >>> grok.testing.grok('waeup.sirp.utils.helpers')
166       >>> grok.testing.grok_component(
167       ...    'MyObjectFactory', MyObjectFactory
168       ...  )
169       True
170
171    After grokking we (and importers) can create objects without
172    knowing about the location of the real class definition, just by
173    the factory name:
174
175       >>> from zope.component import createObject
176       >>> obj = createObject('waeup.sirp.factory.MyObject')
177       >>> isinstance(obj, MyObject)
178       True
179
180    We can also use the regular utility lookups to find our new
181    factory:
182
183       >>> from zope.component import getUtility
184       >>> from zope.component.interfaces import IFactory
185       >>> factory = getUtility(
186       ...   IFactory, name='waeup.sirp.factory.MyObject'
187       ...   )
188       >>> isinstance(factory, MyObjectFactory)
189       True
190
191    And this factory generates `MyObject` instances:
192
193       >>> obj = factory()
194       >>> isinstance(obj, MyObject)
195       True
196
197    """
198    grok.baseclass() # Do not grok this class, do not register us.
199    grok.implements(IFactory)
200    # You can override any of the following attributes in derived
201    # classes. The `grok.name` setting *must* even be set to some
202    # unique value.
203    grok.name(u'waeup.Factory')
204    title = u"Create instances of ``factory``.",
205    description = u"This factory instantiates new applicant instances."
206    factory = None
207
208    def __call__(self, *args, **kw):
209        """The main factory function.
210
211        Returns an instance of the requested object.
212        """
213        return self.factory()
214
215    def getInterfaces(self):
216        # Required by IFactory
217        return implementedBy(self.factory)
218
219def ReST2HTML_w_warnings(source_string):
220    """Convert a reStructuredText string to HTML preserving warnings.
221
222    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
223    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
224    source string (in unicode), ``<WARNINGS>`` is a string containing
225    any warning messages or ``None``.
226
227    Regular multi-line ReStructuredText strings will be returned as
228    HTML code:
229
230        >>> from waeup.sirp.utils.helpers import ReST2HTML
231        >>> source = '''
232        ... Headline
233        ... ========
234        ...
235        ... - A list item
236        ... - Another item
237        ...
238        ... Thanks for watching!
239        ... '''
240        >>> html, warnings = ReST2HTML_w_warnings(source)
241        >>> print html
242        <div class="document" id="headline">
243        <h1 class="title">Headline</h1>
244        <BLANKLINE>
245        <ul class="simple">
246        <li>A list item</li>
247        <li>Another item</li>
248        </ul>
249        <p>Thanks for watching!</p>
250        </div>
251
252    Here no warnings happened, so the `warnings` are ``None``:
253
254        >>> warnings is None
255        True
256
257    If warnings happen then they can be retrieved in the returned
258    ``warnings``. We try to render an erraneous document:
259
260        >>> source = '''
261        ... Headline
262        ... ======
263        ...
264        ... Thanks for watching!
265        ... '''
266        >>> html, warnings = ReST2HTML_w_warnings(source)
267        >>> print html
268        <div class="document" id="headline">
269        <h1 class="title">Headline</h1>
270        <BLANKLINE>
271        <p>Thanks for watching!</p>
272        </div>
273
274        >>> print warnings
275        <string>:3: (WARNING/2) Title underline too short.
276        <BLANKLINE>
277        Headline
278        ======
279        <BLANKLINE>
280
281    As you can see, the warnings are not displayed inline the document
282    but can be retrieved from the returned warnings, which is a string
283    or ``None``.
284    """
285    warnings = StringIO()
286    fulldoc = publish_string(
287        source_string, writer_name='html4css1',
288        settings_overrides={
289            'report_level': 0,
290            'warning_stream': warnings,
291            })
292    warnings.seek(0)
293    warning_msgs = warnings.read()
294    if warning_msgs:
295        # Render again, this time with no warnings inline...
296        fulldoc =  publish_string(
297        source_string, writer_name='html4css1',
298        settings_overrides={
299            'report_level': 10000,
300            'halt_level': 10000,
301            'warning_stream': warnings,
302            })
303    if warning_msgs == '':
304        warning_msgs = None
305    result = getInnerHTMLPart(fulldoc).strip()
306    if not isinstance(result, unicode):
307        result = result.decode('utf-8')
308    return result, warning_msgs
309
310def ReST2HTML(source_string):
311    """Render a string containing ReStructuredText to HTML.
312
313    Any warnings about too short headings, etc. are silently
314    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
315    warnings.
316
317    The returned string will be unicode.
318
319    A regular document will be rendered like this:
320
321        >>> source = '''
322        ... Headline
323        ... ========
324        ...
325        ... Thanks for watching!
326        ... '''
327        >>> html = ReST2HTML(source)
328        >>> print html
329        <div class="document" id="headline">
330        <h1 class="title">Headline</h1>
331        <BLANKLINE>
332        <p>Thanks for watching!</p>
333        </div>
334
335    A document with markup problems (here: the underline is too short)
336    will look similar:
337
338        >>> source = '''
339        ... Headline
340        ... ======
341        ...
342        ... Thanks for watching!
343        ... '''
344        >>> html = ReST2HTML(source)
345        >>> print html
346        <div class="document" id="headline">
347        <h1 class="title">Headline</h1>
348        <BLANKLINE>
349        <p>Thanks for watching!</p>
350        </div>
351
352    """
353    html, warnings = ReST2HTML_w_warnings(source_string)
354    return html
355
356def attrs_to_fields(cls):
357    """Turn the attributes of a class into FieldProperty instances.
358
359    With Python >= 2.6 we can even use this function as a class decorator.
360    """
361    iface = list(implementedBy(cls))[0]
362    for field_name in getFieldNames(iface):
363        setattr(cls, field_name, FieldProperty(iface[field_name]))
364    return cls
Note: See TracBrowser for help on using the repository browser.