Context navigation

source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 5988

Last change on this file since 5988 was 5988, checked in by Henrik Bettermann, 14 years ago
Implement title_prefix vocabulary. Remove redundant getName method and replace by the longtitle property.
File size: 10.9 KB

Line
1	"""General helper functions for WAeUP.
2	"""
3	import os
4	import re
5	import sys
6	import shutil
7	import grok
8	from cStringIO import StringIO
9	from docutils.core import publish_string
10	from zope.component.interfaces import IFactory
11	from zope.interface import implementedBy
12
13	def removeFileOrDirectory(filepath):
14	"""Remove a file or directory.
15
16	Different to :func:`shutil.rmtree` we also accept not existing
17	paths (returning silently) and if a dir turns out to be a regular
18	file, we remove that.
19	"""
20	filepath = os.path.abspath(filepath)
21	if not os.path.exists(filepath):
22	return
23	if os.path.isdir(filepath):
24	shutil.rmtree(filepath)
25	else:
26	os.unlink(filepath)
27	return
28
29	def copyFileSystemTree(src, dst, overwrite=False, del_old=False):
30	"""Copy contents of directory src to directory dst.
31
32	Both directories must exists.
33
34	If `overwrite` is true, any same named objects will be
35	overwritten. Otherwise these files will not be touched.
36
37	If `del_old` is true, copied files and directories will be removed
38	from the src directory.
39
40	This functions returns a list of non-copied files.
41
42	Unix hidden files and directories (starting with '.') are not
43	processed by this function.
44	"""
45	if not os.path.exists(src):
46	raise ValueError('source path does not exist: %s' % src)
47	if not os.path.exists(dst):
48	raise ValueError('destination path does not exist: %s' % dst)
49	if not os.path.isdir(src):
50	raise ValueError('source path is not a directory: %s' % src)
51	if not os.path.isdir(dst):
52	raise ValueError('destination path is not a directory: %s' % dst)
53	not_copied = []
54	for item in os.listdir(src):
55	if item.startswith('.'):
56	continue # We do not copy hidden stuff...
57	itemsrc = os.path.join(src, item)
58	itemdst = os.path.join(dst, item)
59
60	if os.path.exists(itemdst):
61	if overwrite is True:
62	removeFileOrDirectory(itemdst)
63	else:
64	not_copied.append(item)
65	continue
66
67	if os.path.isdir(itemsrc):
68	shutil.copytree(itemsrc, itemdst)
69	else:
70	shutil.copy2(itemsrc, itemdst)
71	if del_old:
72	removeFileOrDirectory(itemsrc)
73	return not_copied
74
75
76	def getInnerHTMLPart(html_code):
77	"""Return the 'inner' part of a complete HTML snippet.
78
79	If there is a form part, get this.
80
81	If there is no form part, try to return the body part contents.
82
83	If there is no body, return as-is.
84
85	Let's see how that works. If we deliver some doc with form, we
86	will get that form only:
87
88	>>> doc = '<html><form>My Form</form>Outside the form</html>'
89	>>> getInnerHTMLPart(doc)
90	'<form>My Form</form>'
91
92	No form? Then seek for a body part and get the contents:
93
94	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
95	>>> getInnerHTMLPart(doc)
96	'My Body'
97
98	If none of these is included, return what we got:
99
100	>>> doc = '<html>without body nor form</html>'
101	>>> getInnerHTMLPart(doc)
102	'<html>without body nor form</html>'
103
104	"""
105
106	try:
107	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
108	re.DOTALL).groups()[0]
109	return result
110	except AttributeError:
111	# No <form> part included
112	try:
113	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
114	re.DOTALL).groups()[0]
115	return result
116	except AttributeError:
117	# No <form> and no <body> tag...
118	pass
119	return html_code
120
121	class FactoryBase(grok.GlobalUtility):
122	"""A factory for things.
123
124	This is a baseclass for easier creation of factories. Factories
125	are utilities that are registered under a certain name and return
126	instances of certain classes when called.
127
128	In :mod:`waeup.sirp` we use factories extensively for
129	batching. While processing a batch some importer looks up a
130	factory to create real-world instances that then get filled with
131	data from imported CSV files.
132
133	To get rid of reimplementing the same stuff over and over again,
134	most notably the methods defined here, we offer this base class
135	(which will not be registered as a factory itself).
136
137	Real factories can then be created like this:
138
139	>>> import grok
140	>>> from waeup.sirp.utils.helpers import FactoryBase
141	>>> class MyObject(object):
142	... # Some class we want to get instances of.
143	... pass
144	>>> class MyObjectFactory(FactoryBase):
145	... # This is the factory for MyObject instances
146	... grok.name(u'waeup.sirp.factory.MyObject')
147	... factory = MyObject
148
149	That's it. It is essential to set the ``factory`` attribute, which
150	will determine the class of which instances should be created when
151	called. The given name must even be unique amongst all utilities
152	registered during runtime. While you can pick any name you like
153	you might want to prepend ``waeup.sirp.factory.`` to the name
154	string to make sure it does not clash with names of other
155	utilities one day.
156
157	Before all this works we have to grok the baseclass once and our
158	freshly defined factory. This executes all the component
159	registration stuff we don't want to do ourselves. In daily use
160	this is done automatically on startup of a :mod:`waeup.sirp`
161	system.
162
163	>>> grok.testing.grok('waeup.sirp.utils.helpers')
164	>>> grok.testing.grok_component(
165	... 'MyObjectFactory', MyObjectFactory
166	... )
167	True
168
169	After grokking we (and importers) can create objects without
170	knowing about the location of the real class definition, just by
171	the factory name:
172
173	>>> from zope.component import createObject
174	>>> obj = createObject('waeup.sirp.factory.MyObject')
175	>>> isinstance(obj, MyObject)
176	True
177
178	We can also use the regular utility lookups to find our new
179	factory:
180
181	>>> from zope.component import getUtility
182	>>> from zope.component.interfaces import IFactory
183	>>> factory = getUtility(
184	... IFactory, name='waeup.sirp.factory.MyObject'
185	... )
186	>>> isinstance(factory, MyObjectFactory)
187	True
188
189	And this factory generates `MyObject` instances:
190
191	>>> obj = factory()
192	>>> isinstance(obj, MyObject)
193	True
194
195	"""
196	grok.baseclass() # Do not grok this class, do not register us.
197	grok.implements(IFactory)
198	# You can override any of the following attributes in derived
199	# classes. The `grok.name` setting must even be set to some
200	# unique value.
201	grok.name(u'waeup.Factory')
202	title = u"Create instances of ``factory``.",
203	description = u"This factory instantiates new applicant instances."
204	factory = None
205
206	def __call__(self, args, *kw):
207	"""The main factory function.
208
209	Returns an instance of the requested object.
210	"""
211	return self.factory()
212
213	def getInterfaces(self):
214	# Required by IFactory
215	return implementedBy(self.factory)
216
217	def ReST2HTML_w_warnings(source_string):
218	"""Convert a reStructuredText string to HTML preserving warnings.
219
220	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
221	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
222	source string (in unicode), ``<WARNINGS>`` is a string containing
223	any warning messages or ``None``.
224
225	Regular multi-line ReStructuredText strings will be returned as
226	HTML code:
227
228	>>> from waeup.sirp.utils.helpers import ReST2HTML
229	>>> source = '''
230	... Headline
231	... ========
232	...
233	... - A list item
234	... - Another item
235	...
236	... Thanks for watching!
237	... '''
238	>>> html, warnings = ReST2HTML_w_warnings(source)
239	>>> print html
240	<div class="document" id="headline">
241	<h1 class="title">Headline</h1>
242	<BLANKLINE>
243	<ul class="simple">
244	<li>A list item</li>
245	<li>Another item</li>
246	</ul>
247	<p>Thanks for watching!</p>
248	</div>
249
250	Here no warnings happened, so the `warnings` are ``None``:
251
252	>>> warnings is None
253	True
254
255	If warnings happen then they can be retrieved in the returned
256	``warnings``. We try to render an erraneous document:
257
258	>>> source = '''
259	... Headline
260	... ======
261	...
262	... Thanks for watching!
263	... '''
264	>>> html, warnings = ReST2HTML_w_warnings(source)
265	>>> print html
266	<div class="document" id="headline">
267	<h1 class="title">Headline</h1>
268	<BLANKLINE>
269	<p>Thanks for watching!</p>
270	</div>
271
272	>>> print warnings
273	<string>:3: (WARNING/2) Title underline too short.
274	<BLANKLINE>
275	Headline
276	======
277	<BLANKLINE>
278
279	As you can see, the warnings are not displayed inline the document
280	but can be retrieved from the returned warnings, which is a string
281	or ``None``.
282	"""
283	warnings = StringIO()
284	fulldoc = publish_string(
285	source_string, writer_name='html4css1',
286	settings_overrides={
287	'report_level': 0,
288	'warning_stream': warnings,
289	})
290	warnings.seek(0)
291	warning_msgs = warnings.read()
292	if warning_msgs:
293	# Render again, this time with no warnings inline...
294	fulldoc = publish_string(
295	source_string, writer_name='html4css1',
296	settings_overrides={
297	'report_level': 10000,
298	'halt_level': 10000,
299	'warning_stream': warnings,
300	})
301	if warning_msgs == '':
302	warning_msgs = None
303	result = getInnerHTMLPart(fulldoc).strip()
304	if not isinstance(result, unicode):
305	result = result.decode('utf-8')
306	return result, warning_msgs
307
308	def ReST2HTML(source_string):
309	"""Render a string containing ReStructuredText to HTML.
310
311	Any warnings about too short headings, etc. are silently
312	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
313	warnings.
314
315	The returned string will be unicode.
316
317	A regular document will be rendered like this:
318
319	>>> source = '''
320	... Headline
321	... ========
322	...
323	... Thanks for watching!
324	... '''
325	>>> html = ReST2HTML(source)
326	>>> print html
327	<div class="document" id="headline">
328	<h1 class="title">Headline</h1>
329	<BLANKLINE>
330	<p>Thanks for watching!</p>
331	</div>
332
333	A document with markup problems (here: the underline is too short)
334	will look similar:
335
336	>>> source = '''
337	... Headline
338	... ======
339	...
340	... Thanks for watching!
341	... '''
342	>>> html = ReST2HTML(source)
343	>>> print html
344	<div class="document" id="headline">
345	<h1 class="title">Headline</h1>
346	<BLANKLINE>
347	<p>Thanks for watching!</p>
348	</div>
349
350	"""
351	html, warnings = ReST2HTML_w_warnings(source_string)
352	return html

Note: See TracBrowser for help on using the repository browser.

Download in other formats: