source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 6001

Last change on this file since 6001 was 5988, checked in by Henrik Bettermann, 14 years ago

Implement title_prefix vocabulary. Remove redundant getName method and replace by the longtitle property.

File size: 10.9 KB
Line 
1"""General helper functions for WAeUP.
2"""
3import os
4import re
5import sys
6import shutil
7import grok
8from cStringIO import StringIO
9from docutils.core import publish_string
10from zope.component.interfaces import IFactory
11from zope.interface import implementedBy
12
13def removeFileOrDirectory(filepath):
14    """Remove a file or directory.
15
16    Different to :func:`shutil.rmtree` we also accept not existing
17    paths (returning silently) and if a dir turns out to be a regular
18    file, we remove that.
19    """
20    filepath = os.path.abspath(filepath)
21    if not os.path.exists(filepath):
22        return
23    if os.path.isdir(filepath):
24        shutil.rmtree(filepath)
25    else:
26        os.unlink(filepath)
27    return
28
29def copyFileSystemTree(src, dst, overwrite=False, del_old=False):
30    """Copy contents of directory src to directory dst.
31
32    Both directories must exists.
33
34    If `overwrite` is true, any same named objects will be
35    overwritten. Otherwise these files will not be touched.
36
37    If `del_old` is true, copied files and directories will be removed
38    from the src directory.
39
40    This functions returns a list of non-copied files.
41
42    Unix hidden files and directories (starting with '.') are not
43    processed by this function.
44    """
45    if not os.path.exists(src):
46        raise ValueError('source path does not exist: %s' % src)
47    if not os.path.exists(dst):
48        raise ValueError('destination path does not exist: %s' % dst)
49    if not os.path.isdir(src):
50        raise ValueError('source path is not a directory: %s' % src)
51    if not os.path.isdir(dst):
52        raise ValueError('destination path is not a directory: %s' % dst)
53    not_copied = []
54    for item in os.listdir(src):
55        if item.startswith('.'):
56            continue # We do not copy hidden stuff...
57        itemsrc = os.path.join(src, item)
58        itemdst = os.path.join(dst, item)
59
60        if os.path.exists(itemdst):
61            if overwrite is True:
62                removeFileOrDirectory(itemdst)
63            else:
64                not_copied.append(item)
65                continue
66       
67        if os.path.isdir(itemsrc):
68            shutil.copytree(itemsrc, itemdst)
69        else:
70            shutil.copy2(itemsrc, itemdst)
71        if del_old:
72            removeFileOrDirectory(itemsrc)
73    return not_copied
74
75
76def getInnerHTMLPart(html_code):
77    """Return the 'inner' part of a complete HTML snippet.
78
79    If there is a form part, get this.
80
81    If there is no form part, try to return the body part contents.
82
83    If there is no body, return as-is.
84
85    Let's see how that works. If we deliver some doc with form, we
86    will get that form only:
87
88       >>> doc = '<html><form>My Form</form>Outside the form</html>'
89       >>> getInnerHTMLPart(doc)
90       '<form>My Form</form>'
91
92    No form? Then seek for a body part and get the contents:
93
94       >>> doc = '<html><body>My Body</body>Trailing Trash</html>'
95       >>> getInnerHTMLPart(doc)
96       'My Body'
97
98    If none of these is included, return what we got:
99
100       >>> doc = '<html>without body nor form</html>'
101       >>> getInnerHTMLPart(doc)
102       '<html>without body nor form</html>'
103
104    """
105
106    try:
107        result = re.match('^.+(<form[^\>]*>.*</form>).+$', html_code,
108                          re.DOTALL).groups()[0]
109        return result
110    except AttributeError:
111        # No <form> part included
112        try:
113            result = re.match('^.+<body[^\>]*>(.*)</body>.*$', html_code,
114                              re.DOTALL).groups()[0]
115            return result
116        except AttributeError:
117            # No <form> and no <body> tag...
118            pass
119    return html_code
120
121class FactoryBase(grok.GlobalUtility):
122    """A factory for things.
123
124    This is a baseclass for easier creation of factories. Factories
125    are utilities that are registered under a certain name and return
126    instances of certain classes when called.
127
128    In :mod:`waeup.sirp` we use factories extensively for
129    batching. While processing a batch some importer looks up a
130    factory to create real-world instances that then get filled with
131    data from imported CSV files.
132
133    To get rid of reimplementing the same stuff over and over again,
134    most notably the methods defined here, we offer this base class
135    (which will *not* be registered as a factory itself).
136
137    Real factories can then be created like this:
138
139       >>> import grok
140       >>> from waeup.sirp.utils.helpers import FactoryBase
141       >>> class MyObject(object):
142       ...   # Some class we want to get instances of.
143       ...   pass
144       >>> class MyObjectFactory(FactoryBase):
145       ...   # This is the factory for MyObject instances
146       ...   grok.name(u'waeup.sirp.factory.MyObject')
147       ...   factory = MyObject
148
149    That's it. It is essential to set the ``factory`` attribute, which
150    will determine the class of which instances should be created when
151    called. The given name must even be unique amongst all utilities
152    registered during runtime. While you can pick any name you like
153    you might want to prepend ``waeup.sirp.factory.`` to the name
154    string to make sure it does not clash with names of other
155    utilities one day.
156
157    Before all this works we have to grok the baseclass once and our
158    freshly defined factory. This executes all the component
159    registration stuff we don't want to do ourselves. In daily use
160    this is done automatically on startup of a :mod:`waeup.sirp`
161    system.
162   
163       >>> grok.testing.grok('waeup.sirp.utils.helpers')
164       >>> grok.testing.grok_component(
165       ...    'MyObjectFactory', MyObjectFactory
166       ...  )
167       True
168
169    After grokking we (and importers) can create objects without
170    knowing about the location of the real class definition, just by
171    the factory name:
172
173       >>> from zope.component import createObject
174       >>> obj = createObject('waeup.sirp.factory.MyObject')
175       >>> isinstance(obj, MyObject)
176       True
177
178    We can also use the regular utility lookups to find our new
179    factory:
180
181       >>> from zope.component import getUtility
182       >>> from zope.component.interfaces import IFactory
183       >>> factory = getUtility(
184       ...   IFactory, name='waeup.sirp.factory.MyObject'
185       ...   )
186       >>> isinstance(factory, MyObjectFactory)
187       True
188
189    And this factory generates `MyObject` instances:
190
191       >>> obj = factory()
192       >>> isinstance(obj, MyObject)
193       True
194
195    """
196    grok.baseclass() # Do not grok this class, do not register us.
197    grok.implements(IFactory)
198    # You can override any of the following attributes in derived
199    # classes. The `grok.name` setting *must* even be set to some
200    # unique value.
201    grok.name(u'waeup.Factory')
202    title = u"Create instances of ``factory``.",
203    description = u"This factory instantiates new applicant instances."
204    factory = None
205
206    def __call__(self, *args, **kw):
207        """The main factory function.
208
209        Returns an instance of the requested object.
210        """
211        return self.factory()
212
213    def getInterfaces(self):
214        # Required by IFactory
215        return implementedBy(self.factory)
216
217def ReST2HTML_w_warnings(source_string):
218    """Convert a reStructuredText string to HTML preserving warnings.
219
220    Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
221    strings. Where ``<HTML_CODE>`` is the HTML code generated from the
222    source string (in unicode), ``<WARNINGS>`` is a string containing
223    any warning messages or ``None``.
224   
225    Regular multi-line ReStructuredText strings will be returned as
226    HTML code:
227
228        >>> from waeup.sirp.utils.helpers import ReST2HTML
229        >>> source = '''
230        ... Headline
231        ... ========
232        ...
233        ... - A list item
234        ... - Another item
235        ...
236        ... Thanks for watching!
237        ... '''
238        >>> html, warnings = ReST2HTML_w_warnings(source)
239        >>> print html
240        <div class="document" id="headline">
241        <h1 class="title">Headline</h1>
242        <BLANKLINE>
243        <ul class="simple">
244        <li>A list item</li>
245        <li>Another item</li>
246        </ul>
247        <p>Thanks for watching!</p>
248        </div>
249
250    Here no warnings happened, so the `warnings` are ``None``:
251
252        >>> warnings is None
253        True
254       
255    If warnings happen then they can be retrieved in the returned
256    ``warnings``. We try to render an erraneous document:
257
258        >>> source = '''
259        ... Headline
260        ... ======
261        ...
262        ... Thanks for watching!
263        ... '''
264        >>> html, warnings = ReST2HTML_w_warnings(source)
265        >>> print html
266        <div class="document" id="headline">
267        <h1 class="title">Headline</h1>
268        <BLANKLINE>
269        <p>Thanks for watching!</p>
270        </div>
271
272        >>> print warnings
273        <string>:3: (WARNING/2) Title underline too short.
274        <BLANKLINE>
275        Headline
276        ======
277        <BLANKLINE>
278
279    As you can see, the warnings are not displayed inline the document
280    but can be retrieved from the returned warnings, which is a string
281    or ``None``.
282    """
283    warnings = StringIO()
284    fulldoc = publish_string(
285        source_string, writer_name='html4css1',
286        settings_overrides={
287            'report_level': 0,
288            'warning_stream': warnings,
289            })
290    warnings.seek(0)
291    warning_msgs = warnings.read()
292    if warning_msgs:
293        # Render again, this time with no warnings inline...
294        fulldoc =  publish_string(
295        source_string, writer_name='html4css1',
296        settings_overrides={
297            'report_level': 10000,
298            'halt_level': 10000,
299            'warning_stream': warnings,
300            })
301    if warning_msgs == '':
302        warning_msgs = None
303    result = getInnerHTMLPart(fulldoc).strip()
304    if not isinstance(result, unicode):
305        result = result.decode('utf-8')
306    return result, warning_msgs
307
308def ReST2HTML(source_string):
309    """Render a string containing ReStructuredText to HTML.
310
311    Any warnings about too short headings, etc. are silently
312    discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
313    warnings.
314
315    The returned string will be unicode.
316   
317    A regular document will be rendered like this:
318
319        >>> source = '''
320        ... Headline
321        ... ========
322        ...
323        ... Thanks for watching!
324        ... '''
325        >>> html = ReST2HTML(source)
326        >>> print html
327        <div class="document" id="headline">
328        <h1 class="title">Headline</h1>
329        <BLANKLINE>
330        <p>Thanks for watching!</p>
331        </div>
332
333    A document with markup problems (here: the underline is too short)
334    will look similar:
335
336        >>> source = '''
337        ... Headline
338        ... ======
339        ...
340        ... Thanks for watching!
341        ... '''
342        >>> html = ReST2HTML(source)
343        >>> print html
344        <div class="document" id="headline">
345        <h1 class="title">Headline</h1>
346        <BLANKLINE>
347        <p>Thanks for watching!</p>
348        </div>
349       
350    """
351    html, warnings = ReST2HTML_w_warnings(source_string)
352    return html
Note: See TracBrowser for help on using the repository browser.