Context navigation

source: main/waeup.sirp/trunk/src/waeup/sirp/utils/helpers.py @ 6741

Last change on this file since 6741 was 6531, checked in by uli, 13 years ago
Make sure we compare files from the beginning.
File size: 12.4 KB

Line
1	"""General helper functions for WAeUP.
2	"""
3	import os
4	import re
5	import sys
6	import shutil
7	import grok
8	from cStringIO import StringIO
9	from docutils.core import publish_string
10	from zope.component.interfaces import IFactory
11	from zope.interface import implementedBy
12	from zope.schema import getFieldNames
13	from zope.schema.fieldproperty import FieldProperty
14	from zope.security.interfaces import NoInteraction
15	from zope.security.management import getInteraction
16
17	BUFSIZE = 8 * 1024
18
19	def removeFileOrDirectory(filepath):
20	"""Remove a file or directory.
21
22	Different to :func:`shutil.rmtree` we also accept not existing
23	paths (returning silently) and if a dir turns out to be a regular
24	file, we remove that.
25	"""
26	filepath = os.path.abspath(filepath)
27	if not os.path.exists(filepath):
28	return
29	if os.path.isdir(filepath):
30	shutil.rmtree(filepath)
31	else:
32	os.unlink(filepath)
33	return
34
35	def copyFileSystemTree(src, dst, overwrite=False, del_old=False):
36	"""Copy contents of directory src to directory dst.
37
38	Both directories must exists.
39
40	If `overwrite` is true, any same named objects will be
41	overwritten. Otherwise these files will not be touched.
42
43	If `del_old` is true, copied files and directories will be removed
44	from the src directory.
45
46	This functions returns a list of non-copied files.
47
48	Unix hidden files and directories (starting with '.') are not
49	processed by this function.
50	"""
51	if not os.path.exists(src):
52	raise ValueError('source path does not exist: %s' % src)
53	if not os.path.exists(dst):
54	raise ValueError('destination path does not exist: %s' % dst)
55	if not os.path.isdir(src):
56	raise ValueError('source path is not a directory: %s' % src)
57	if not os.path.isdir(dst):
58	raise ValueError('destination path is not a directory: %s' % dst)
59	not_copied = []
60	for item in os.listdir(src):
61	if item.startswith('.'):
62	continue # We do not copy hidden stuff...
63	itemsrc = os.path.join(src, item)
64	itemdst = os.path.join(dst, item)
65
66	if os.path.exists(itemdst):
67	if overwrite is True:
68	removeFileOrDirectory(itemdst)
69	else:
70	not_copied.append(item)
71	continue
72
73	if os.path.isdir(itemsrc):
74	shutil.copytree(itemsrc, itemdst)
75	else:
76	shutil.copy2(itemsrc, itemdst)
77	if del_old:
78	removeFileOrDirectory(itemsrc)
79	return not_copied
80
81
82	def getInnerHTMLPart(html_code):
83	"""Return the 'inner' part of a complete HTML snippet.
84
85	If there is a form part, get this.
86
87	If there is no form part, try to return the body part contents.
88
89	If there is no body, return as-is.
90
91	Let's see how that works. If we deliver some doc with form, we
92	will get that form only:
93
94	>>> doc = '<html><form>My Form</form>Outside the form</html>'
95	>>> getInnerHTMLPart(doc)
96	'<form>My Form</form>'
97
98	No form? Then seek for a body part and get the contents:
99
100	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
101	>>> getInnerHTMLPart(doc)
102	'My Body'
103
104	If none of these is included, return what we got:
105
106	>>> doc = '<html>without body nor form</html>'
107	>>> getInnerHTMLPart(doc)
108	'<html>without body nor form</html>'
109
110	"""
111
112	try:
113	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
114	re.DOTALL).groups()[0]
115	return result
116	except AttributeError:
117	# No <form> part included
118	try:
119	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
120	re.DOTALL).groups()[0]
121	return result
122	except AttributeError:
123	# No <form> and no <body> tag...
124	pass
125	return html_code
126
127	class FactoryBase(grok.GlobalUtility):
128	"""A factory for things.
129
130	This is a baseclass for easier creation of factories. Factories
131	are utilities that are registered under a certain name and return
132	instances of certain classes when called.
133
134	In :mod:`waeup.sirp` we use factories extensively for
135	batching. While processing a batch some importer looks up a
136	factory to create real-world instances that then get filled with
137	data from imported CSV files.
138
139	To get rid of reimplementing the same stuff over and over again,
140	most notably the methods defined here, we offer this base class
141	(which will not be registered as a factory itself).
142
143	Real factories can then be created like this:
144
145	>>> import grok
146	>>> from waeup.sirp.utils.helpers import FactoryBase
147	>>> class MyObject(object):
148	... # Some class we want to get instances of.
149	... pass
150	>>> class MyObjectFactory(FactoryBase):
151	... # This is the factory for MyObject instances
152	... grok.name(u'waeup.sirp.factory.MyObject')
153	... factory = MyObject
154
155	That's it. It is essential to set the ``factory`` attribute, which
156	will determine the class of which instances should be created when
157	called. The given name must even be unique amongst all utilities
158	registered during runtime. While you can pick any name you like
159	you might want to prepend ``waeup.sirp.factory.`` to the name
160	string to make sure it does not clash with names of other
161	utilities one day.
162
163	Before all this works we have to grok the baseclass once and our
164	freshly defined factory. This executes all the component
165	registration stuff we don't want to do ourselves. In daily use
166	this is done automatically on startup of a :mod:`waeup.sirp`
167	system.
168
169	>>> grok.testing.grok('waeup.sirp.utils.helpers')
170	>>> grok.testing.grok_component(
171	... 'MyObjectFactory', MyObjectFactory
172	... )
173	True
174
175	After grokking we (and importers) can create objects without
176	knowing about the location of the real class definition, just by
177	the factory name:
178
179	>>> from zope.component import createObject
180	>>> obj = createObject('waeup.sirp.factory.MyObject')
181	>>> isinstance(obj, MyObject)
182	True
183
184	We can also use the regular utility lookups to find our new
185	factory:
186
187	>>> from zope.component import getUtility
188	>>> from zope.component.interfaces import IFactory
189	>>> factory = getUtility(
190	... IFactory, name='waeup.sirp.factory.MyObject'
191	... )
192	>>> isinstance(factory, MyObjectFactory)
193	True
194
195	And this factory generates `MyObject` instances:
196
197	>>> obj = factory()
198	>>> isinstance(obj, MyObject)
199	True
200
201	"""
202	grok.baseclass() # Do not grok this class, do not register us.
203	grok.implements(IFactory)
204	# You can override any of the following attributes in derived
205	# classes. The `grok.name` setting must even be set to some
206	# unique value.
207	grok.name(u'waeup.Factory')
208	title = u"Create instances of ``factory``.",
209	description = u"This factory instantiates new applicant instances."
210	factory = None
211
212	def __call__(self, args, *kw):
213	"""The main factory function.
214
215	Returns an instance of the requested object.
216	"""
217	return self.factory()
218
219	def getInterfaces(self):
220	# Required by IFactory
221	return implementedBy(self.factory)
222
223	def ReST2HTML_w_warnings(source_string):
224	"""Convert a reStructuredText string to HTML preserving warnings.
225
226	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
227	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
228	source string (in unicode), ``<WARNINGS>`` is a string containing
229	any warning messages or ``None``.
230
231	Regular multi-line ReStructuredText strings will be returned as
232	HTML code:
233
234	>>> from waeup.sirp.utils.helpers import ReST2HTML
235	>>> source = '''
236	... Headline
237	... ========
238	...
239	... - A list item
240	... - Another item
241	...
242	... Thanks for watching!
243	... '''
244	>>> html, warnings = ReST2HTML_w_warnings(source)
245	>>> print html
246	<div class="document" id="headline">
247	<h1 class="title">Headline</h1>
248	<BLANKLINE>
249	<ul class="simple">
250	<li>A list item</li>
251	<li>Another item</li>
252	</ul>
253	<p>Thanks for watching!</p>
254	</div>
255
256	Here no warnings happened, so the `warnings` are ``None``:
257
258	>>> warnings is None
259	True
260
261	If warnings happen then they can be retrieved in the returned
262	``warnings``. We try to render an erraneous document:
263
264	>>> source = '''
265	... Headline
266	... ======
267	...
268	... Thanks for watching!
269	... '''
270	>>> html, warnings = ReST2HTML_w_warnings(source)
271	>>> print html
272	<div class="document" id="headline">
273	<h1 class="title">Headline</h1>
274	<BLANKLINE>
275	<p>Thanks for watching!</p>
276	</div>
277
278	>>> print warnings
279	<string>:3: (WARNING/2) Title underline too short.
280	<BLANKLINE>
281	Headline
282	======
283	<BLANKLINE>
284
285	As you can see, the warnings are not displayed inline the document
286	but can be retrieved from the returned warnings, which is a string
287	or ``None``.
288	"""
289	warnings = StringIO()
290	fulldoc = publish_string(
291	source_string, writer_name='html4css1',
292	settings_overrides={
293	'report_level': 0,
294	'warning_stream': warnings,
295	})
296	warnings.seek(0)
297	warning_msgs = warnings.read()
298	if warning_msgs:
299	# Render again, this time with no warnings inline...
300	fulldoc = publish_string(
301	source_string, writer_name='html4css1',
302	settings_overrides={
303	'report_level': 10000,
304	'halt_level': 10000,
305	'warning_stream': warnings,
306	})
307	if warning_msgs == '':
308	warning_msgs = None
309	result = getInnerHTMLPart(fulldoc).strip()
310	if not isinstance(result, unicode):
311	result = result.decode('utf-8')
312	return result, warning_msgs
313
314	def ReST2HTML(source_string):
315	"""Render a string containing ReStructuredText to HTML.
316
317	Any warnings about too short headings, etc. are silently
318	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
319	warnings.
320
321	The returned string will be unicode.
322
323	A regular document will be rendered like this:
324
325	>>> source = '''
326	... Headline
327	... ========
328	...
329	... Thanks for watching!
330	... '''
331	>>> html = ReST2HTML(source)
332	>>> print html
333	<div class="document" id="headline">
334	<h1 class="title">Headline</h1>
335	<BLANKLINE>
336	<p>Thanks for watching!</p>
337	</div>
338
339	A document with markup problems (here: the underline is too short)
340	will look similar:
341
342	>>> source = '''
343	... Headline
344	... ======
345	...
346	... Thanks for watching!
347	... '''
348	>>> html = ReST2HTML(source)
349	>>> print html
350	<div class="document" id="headline">
351	<h1 class="title">Headline</h1>
352	<BLANKLINE>
353	<p>Thanks for watching!</p>
354	</div>
355
356	"""
357	html, warnings = ReST2HTML_w_warnings(source_string)
358	return html
359
360	def attrs_to_fields(cls):
361	"""Turn the attributes of a class into FieldProperty instances.
362
363	With Python >= 2.6 we can even use this function as a class decorator.
364	"""
365	iface = list(implementedBy(cls))[0]
366	for field_name in getFieldNames(iface):
367	setattr(cls, field_name, FieldProperty(iface[field_name]))
368	return cls
369
370	def get_current_principal():
371	"""Get the 'current' principal.
372
373	This method works without a request. Examining a request is the
374	regular (and recommended) way to get a principal involved
375	'currently'.
376
377	Use this method only if you really have no access to the current
378	request.
379
380	Returns ``None`` when no principal is involved (for instance
381	during tests).
382	"""
383	try:
384	principal = getInteraction().participations[0].principal
385	except NoInteraction:
386	return None
387	except IndexError: # No participations present
388	return None
389	return principal
390
391	def cmp_files(file_descr1, file_descr2):
392	"""Compare two files by their file descriptors.
393
394	Returns ``True`` if both are equal, ``False`` otherwise.
395	"""
396	file_descr1.seek(0)
397	file_descr2.seek(0)
398	while True:
399	b1 = file_descr1.read(BUFSIZE)
400	b2 = file_descr2.read(BUFSIZE)
401	if b1 != b2:
402	return False
403	if not b1:
404	return True

Note: See TracBrowser for help on using the repository browser.

Download in other formats: