source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 5971

Last change on this file since 5971 was 5876, checked in by uli, 14 years ago

Fix ReST2HTML() to always deliver unicode strings. Otherwise we could
get EnodingErrors? when using this function with HTML forms.

File size: 11.5 KB
Line 
1"""General helper functions for WAeUP.
2"""
3import os
4import re
5import sys
6import shutil
7import grok
8from cStringIO import StringIO
9from docutils.core import publish_string
10from zope.component.interfaces import IFactory
11from zope.interface import implementedBy
12
13def removeFileOrDirectory(filepath):
14    """Remove a file or directory.
15
16    Different to :func:`shutil.rmtree` we also accept not existing
17    paths (returning silently) and if a dir turns out to be a regular
18    file, we remove that.
19    """
20    filepath = os.path.abspath(filepath)
21    if not os.path.exists(filepath):
22        return
23    if os.path.isdir(filepath):
24        shutil.rmtree(filepath)
25    else:
26        os.unlink(filepath)
27    return
28
29def copyFileSystemTree(src, dst, overwrite=False, del_old=False):
30    """Copy contents of directory src to directory dst.
31
32    Both directories must exists.
33
34    If `overwrite` is true, any same named objects will be
35    overwritten. Otherwise these files will not be touched.
36
37    If `del_old` is true, copied files and directories will be removed
38    from the src directory.
39
40    This functions returns a list of non-copied files.
41
42    Unix hidden files and directories (starting with '.') are not
43    processed by this function.
44    """
45    if not os.path.exists(src):
46        raise ValueError('source path does not exist: %s' % src)
47    if not os.path.exists(dst):
48        raise ValueError('destination path does not exist: %s' % dst)
49    if not os.path.isdir(src):
50        raise ValueError('source path is not a directory: %s' % src)
51    if not os.path.isdir(dst):
52        raise ValueError('destination path is not a directory: %s' % dst)
53    not_copied = []
54    for item in os.listdir(src):
55        if item.startswith('.'):
56            continue # We do not copy hidden stuff...
57        itemsrc = os.path.join(src, item)
58        itemdst = os.path.join(dst, item)
59
60        if os.path.exists(itemdst):
61            if overwrite is True:
62                removeFileOrDirectory(itemdst)
63            else:
64                not_copied.append(item)
65                continue
66       
67        if os.path.isdir(itemsrc):
68            shutil.copytree(itemsrc, itemdst)
69        else:
70            shutil.copy2(itemsrc, itemdst)
71        if del_old:
72            removeFileOrDirectory(itemsrc)
73    return not_copied
74
75
76def getInnerHTMLPart(html_code):
77    """Return the 'inner' part of a complete HTML snippet.
78
79    If there is a form part, get this.
80
81    If there is no form part, try to return the body part contents.
82
83    If there is no body, return as-is.
84
85    Let's see how that works. If we deliver some doc with form, we
86    will get that form only:
87
88       >>> doc = '<html><form>My Form</form>Outside the form</html>'
89       >>> getInnerHTMLPart(doc)
90       '<form>My Form</form>'
91
92    No form? Then seek for a body part and get the contents:
93
94       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
95       >>> getInnerHTMLPart(doc)
96       'My Body'
97
98    If none of these is included, return what we got:
99
100       >>> doc = '<html>without body nor form</html>'
101       >>> getInnerHTMLPart(doc)
102       '<html>without body nor form</html>'
103
104    """
105
106    try:
107        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
108                          re.DOTALL).groups()[0]
109        return result
110    except AttributeError:
111        # No <form> part included
112        try:
113            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
114                              re.DOTALL).groups()[0]
115            return result
116        except AttributeError:
117            # No <form> and no <body> tag...
118            pass
119    return html_code
120
121def getName(context):
122    """Construct a name out of an object with prefix and title.
123
124    The `context` has to provide `title_prefix` and `title`
125    attributes.
126
127        >>> from waeup.sirp.utils.helpers import getName
128        >>> class FakeObject(object):
129        ...     title_prefix = 'department'
130        ...     title = 'Strange Things'
131        >>> getName(FakeObject())
132        'Department of Strange Things'
133
134    As we can see in the result the `title_prefix` is rendered
135    uppercase.
136       
137    """
138    prefix = context.title_prefix
139    prefix = prefix[0].upper() + prefix[1:]
140    return '%s of %s' % (prefix, context.title)
141
142class FactoryBase(grok.GlobalUtility):
143    """A factory for things.
144
145    This is a baseclass for easier creation of factories. Factories
146    are utilities that are registered under a certain name and return
147    instances of certain classes when called.
148
149    In :mod:`waeup.sirp` we use factories extensively for
150    batching. While processing a batch some importer looks up a
151    factory to create real-world instances that then get filled with
152    data from imported CSV files.
153
154    To get rid of reimplementing the same stuff over and over again,
155    most notably the methods defined here, we offer this base class
156    (which will *not* be registered as a factory itself).
157
158    Real factories can then be created like this:
159
160       >>> import grok
161       >>> from waeup.sirp.utils.helpers import FactoryBase
162       >>> class MyObject(object):
163       ...   # Some class we want to get instances of.
164       ...   pass
165       >>> class MyObjectFactory(FactoryBase):
166       ...   # This is the factory for MyObject instances
167       ...   grok.name(u'waeup.sirp.factory.MyObject')
168       ...   factory = MyObject
169
170    That's it. It is essential to set the ``factory`` attribute, which
171    will determine the class of which instances should be created when
172    called. The given name must even be unique amongst all utilities
173    registered during runtime. While you can pick any name you like
174    you might want to prepend ``waeup.sirp.factory.`` to the name
175    string to make sure it does not clash with names of other
176    utilities one day.
177
178    Before all this works we have to grok the baseclass once and our
179    freshly defined factory. This executes all the component
180    registration stuff we don't want to do ourselves. In daily use
181    this is done automatically on startup of a :mod:`waeup.sirp`
182    system.
183   
184       >>> grok.testing.grok('waeup.sirp.utils.helpers')
185       >>> grok.testing.grok_component(
186       ...    'MyObjectFactory', MyObjectFactory
187       ...  )
188       True
189
190    After grokking we (and importers) can create objects without
191    knowing about the location of the real class definition, just by
192    the factory name:
193
194       >>> from zope.component import createObject
195       >>> obj = createObject('waeup.sirp.factory.MyObject')
196       >>> isinstance(obj, MyObject)
197       True
198
199    We can also use the regular utility lookups to find our new
200    factory:
201
202       >>> from zope.component import getUtility
203       >>> from zope.component.interfaces import IFactory
204       >>> factory = getUtility(
205       ...   IFactory, name='waeup.sirp.factory.MyObject'
206       ...   )
207       >>> isinstance(factory, MyObjectFactory)
208       True
209
210    And this factory generates `MyObject` instances:
211
212       >>> obj = factory()
213       >>> isinstance(obj, MyObject)
214       True
215
216    """
217    grok.baseclass() # Do not grok this class, do not register us.
218    grok.implements(IFactory)
219    # You can override any of the following attributes in derived
220    # classes. The `grok.name` setting *must* even be set to some
221    # unique value.
222    grok.name(u'waeup.Factory')
223    title = u"Create instances of ``factory``.",
224    description = u"This factory instantiates new applicant instances."
225    factory = None
226
227    def __call__(self, *args, **kw):
228        """The main factory function.
229
230        Returns an instance of the requested object.
231        """
232        return self.factory()
233
234    def getInterfaces(self):
235        # Required by IFactory
236        return implementedBy(self.factory)
237
238def ReST2HTML_w_warnings(source_string):
239    """Convert a reStructuredText string to HTML preserving warnings.
240
241    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
242    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
243    source string (in unicode), ``<WARNINGS>`` is a string containing
244    any warning messages or ``None``.
245   
246    Regular multi-line ReStructuredText strings will be returned as
247    HTML code:
248
249        >>> from waeup.sirp.utils.helpers import ReST2HTML
250        >>> source = '''
251        ... Headline
252        ... ========
253        ...
254        ... - A list item
255        ... - Another item
256        ...
257        ... Thanks for watching!
258        ... '''
259        >>> html, warnings = ReST2HTML_w_warnings(source)
260        >>> print html
261        <div class="document" id="headline">
262        <h1 class="title">Headline</h1>
263        <BLANKLINE>
264        <ul class="simple">
265        <li>A list item</li>
266        <li>Another item</li>
267        </ul>
268        <p>Thanks for watching!</p>
269        </div>
270
271    Here no warnings happened, so the `warnings` are ``None``:
272
273        >>> warnings is None
274        True
275       
276    If warnings happen then they can be retrieved in the returned
277    ``warnings``. We try to render an erraneous document:
278
279        >>> source = '''
280        ... Headline
281        ... ======
282        ...
283        ... Thanks for watching!
284        ... '''
285        >>> html, warnings = ReST2HTML_w_warnings(source)
286        >>> print html
287        <div class="document" id="headline">
288        <h1 class="title">Headline</h1>
289        <BLANKLINE>
290        <p>Thanks for watching!</p>
291        </div>
292
293        >>> print warnings
294        <string>:3: (WARNING/2) Title underline too short.
295        <BLANKLINE>
296        Headline
297        ======
298        <BLANKLINE>
299
300    As you can see, the warnings are not displayed inline the document
301    but can be retrieved from the returned warnings, which is a string
302    or ``None``.
303    """
304    warnings = StringIO()
305    fulldoc = publish_string(
306        source_string, writer_name='html4css1',
307        settings_overrides={
308            'report_level': 0,
309            'warning_stream': warnings,
310            })
311    warnings.seek(0)
312    warning_msgs = warnings.read()
313    if warning_msgs:
314        # Render again, this time with no warnings inline...
315        fulldoc =  publish_string(
316        source_string, writer_name='html4css1',
317        settings_overrides={
318            'report_level': 10000,
319            'halt_level': 10000,
320            'warning_stream': warnings,
321            })
322    if warning_msgs == '':
323        warning_msgs = None
324    result = getInnerHTMLPart(fulldoc).strip()
325    if not isinstance(result, unicode):
326        result = result.decode('utf-8')
327    return result, warning_msgs
328
329def ReST2HTML(source_string):
330    """Render a string containing ReStructuredText to HTML.
331
332    Any warnings about too short headings, etc. are silently
333    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
334    warnings.
335
336    The returned string will be unicode.
337   
338    A regular document will be rendered like this:
339
340        >>> source = '''
341        ... Headline
342        ... ========
343        ...
344        ... Thanks for watching!
345        ... '''
346        >>> html = ReST2HTML(source)
347        >>> print html
348        <div class="document" id="headline">
349        <h1 class="title">Headline</h1>
350        <BLANKLINE>
351        <p>Thanks for watching!</p>
352        </div>
353
354    A document with markup problems (here: the underline is too short)
355    will look similar:
356
357        >>> source = '''
358        ... Headline
359        ... ======
360        ...
361        ... Thanks for watching!
362        ... '''
363        >>> html = ReST2HTML(source)
364        >>> print html
365        <div class="document" id="headline">
366        <h1 class="title">Headline</h1>
367        <BLANKLINE>
368        <p>Thanks for watching!</p>
369        </div>
370       
371    """
372    html, warnings = ReST2HTML_w_warnings(source_string)
373    return html
Note: See TracBrowser for help on using the repository browser.