Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 8758

Last change on this file since 8758 was 8739, checked in by Henrik Bettermann, 12 years ago
Improve logging (part 1).
Property svn:keywords set to `Id`
File size: 22.0 KB

Line
1	## $Id: helpers.py 8739 2012-06-17 12:13:45Z henrik $
2	##
3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""General helper functions for Kofa.
19	"""
20	import csv
21	import datetime
22	import imghdr
23	import os
24	import pytz
25	import re
26	import shutil
27	import tempfile
28	import grok
29	from cStringIO import StringIO
30	from docutils.core import publish_string
31	from zope.component import getUtility
32	from zope.component.interfaces import IFactory
33	from zope.interface import implementedBy
34	from zope.interface.interface import Method, Attribute
35	from zope.schema import getFieldNames
36	from zope.schema.fieldproperty import FieldProperty
37	from zope.security.interfaces import NoInteraction
38	from zope.security.management import getInteraction
39	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
40	from waeup.kofa.interfaces import MessageFactory as _
41
42	BUFSIZE = 8 * 1024
43
44	def remove_file_or_directory(filepath):
45	"""Remove a file or directory.
46
47	Different to :func:`shutil.rmtree` we also accept not existing
48	paths (returning silently) and if a dir turns out to be a regular
49	file, we remove that.
50	"""
51	filepath = os.path.abspath(filepath)
52	if not os.path.exists(filepath):
53	return
54	if os.path.isdir(filepath):
55	shutil.rmtree(filepath)
56	else:
57	os.unlink(filepath)
58	return
59
60	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
61	"""Copy contents of directory src to directory dst.
62
63	Both directories must exists.
64
65	If `overwrite` is true, any same named objects will be
66	overwritten. Otherwise these files will not be touched.
67
68	If `del_old` is true, copied files and directories will be removed
69	from the src directory.
70
71	This functions returns a list of non-copied files.
72
73	Unix hidden files and directories (starting with '.') are not
74	processed by this function.
75	"""
76	if not os.path.exists(src):
77	raise ValueError('source path does not exist: %s' % src)
78	if not os.path.exists(dst):
79	raise ValueError('destination path does not exist: %s' % dst)
80	if not os.path.isdir(src):
81	raise ValueError('source path is not a directory: %s' % src)
82	if not os.path.isdir(dst):
83	raise ValueError('destination path is not a directory: %s' % dst)
84	not_copied = []
85	for item in os.listdir(src):
86	if item.startswith('.'):
87	continue # We do not copy hidden stuff...
88	itemsrc = os.path.join(src, item)
89	itemdst = os.path.join(dst, item)
90
91	if os.path.exists(itemdst):
92	if overwrite is True:
93	remove_file_or_directory(itemdst)
94	else:
95	not_copied.append(item)
96	continue
97
98	if os.path.isdir(itemsrc):
99	shutil.copytree(itemsrc, itemdst)
100	else:
101	shutil.copy2(itemsrc, itemdst)
102	if del_old:
103	remove_file_or_directory(itemsrc)
104	return not_copied
105
106
107	def get_inner_HTML_part(html_code):
108	"""Return the 'inner' part of a complete HTML snippet.
109
110	If there is a form part, get this.
111
112	If there is no form part, try to return the body part contents.
113
114	If there is no body, return as-is.
115
116	Let's see how that works. If we deliver some doc with form, we
117	will get that form only:
118
119	>>> doc = '<html><form>My Form</form>Outside the form</html>'
120	>>> get_inner_HTML_part(doc)
121	'<form>My Form</form>'
122
123	No form? Then seek for a body part and get the contents:
124
125	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
126	>>> get_inner_HTML_part(doc)
127	'My Body'
128
129	If none of these is included, return what we got:
130
131	>>> doc = '<html>without body nor form</html>'
132	>>> get_inner_HTML_part(doc)
133	'<html>without body nor form</html>'
134
135	"""
136
137	try:
138	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
139	re.DOTALL).groups()[0]
140	return result
141	except AttributeError:
142	# No <form> part included
143	try:
144	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
145	re.DOTALL).groups()[0]
146	return result
147	except AttributeError:
148	# No <form> and no <body> tag...
149	pass
150	return html_code
151
152	class FactoryBase(grok.GlobalUtility):
153	"""A factory for things.
154
155	This is a baseclass for easier creation of factories. Factories
156	are utilities that are registered under a certain name and return
157	instances of certain classes when called.
158
159	In :mod:`waeup.kofa` we use factories extensively for
160	batching. While processing a batch some processors looks up a
161	factory to create real-world instances that then get filled with
162	data from imported CSV files.
163
164	To get rid of reimplementing the same stuff over and over again,
165	most notably the methods defined here, we offer this base class
166	(which will not be registered as a factory itself).
167
168	Real factories can then be created like this:
169
170	>>> import grok
171	>>> from waeup.kofa.utils.helpers import FactoryBase
172	>>> class MyObject(object):
173	... # Some class we want to get instances of.
174	... pass
175	>>> class MyObjectFactory(FactoryBase):
176	... # This is the factory for MyObject instances
177	... grok.name(u'waeup.kofa.factory.MyObject')
178	... factory = MyObject
179
180	That's it. It is essential to set the ``factory`` attribute, which
181	will determine the class of which instances should be created when
182	called. The given name must even be unique amongst all utilities
183	registered during runtime. While you can pick any name you like
184	you might want to prepend ``waeup.kofa.factory.`` to the name
185	string to make sure it does not clash with names of other
186	utilities one day.
187
188	Before all this works we have to grok the baseclass once and our
189	freshly defined factory. This executes all the component
190	registration stuff we don't want to do ourselves. In daily use
191	this is done automatically on startup of a :mod:`waeup.kofa`
192	system.
193
194	>>> grok.testing.grok('waeup.kofa.utils.helpers')
195	>>> grok.testing.grok_component(
196	... 'MyObjectFactory', MyObjectFactory
197	... )
198	True
199
200	After grokking we (and processors) can create objects without
201	knowing about the location of the real class definition, just by
202	the factory name:
203
204	>>> from zope.component import createObject
205	>>> obj = createObject('waeup.kofa.factory.MyObject')
206	>>> isinstance(obj, MyObject)
207	True
208
209	We can also use the regular utility lookups to find our new
210	factory:
211
212	>>> from zope.component import getUtility
213	>>> from zope.component.interfaces import IFactory
214	>>> factory = getUtility(
215	... IFactory, name='waeup.kofa.factory.MyObject'
216	... )
217	>>> isinstance(factory, MyObjectFactory)
218	True
219
220	And this factory generates `MyObject` instances:
221
222	>>> obj = factory()
223	>>> isinstance(obj, MyObject)
224	True
225
226	"""
227	grok.baseclass() # Do not grok this class, do not register us.
228	grok.implements(IFactory)
229	# You can override any of the following attributes in derived
230	# classes. The `grok.name` setting must even be set to some
231	# unique value.
232	grok.name(u'waeup.Factory')
233	title = u"Create instances of ``factory``.",
234	description = u"This factory instantiates new applicant instances."
235	factory = None
236
237	def __call__(self, args, *kw):
238	"""The main factory function.
239
240	Returns an instance of the requested object.
241	"""
242	return self.factory()
243
244	def getInterfaces(self):
245	# Required by IFactory
246	return implementedBy(self.factory)
247
248	def ReST2HTML_w_warnings(source_string):
249	"""Convert a reStructuredText string to HTML preserving warnings.
250
251	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
252	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
253	source string (in unicode), ``<WARNINGS>`` is a string containing
254	any warning messages or ``None``.
255
256	Regular multi-line ReStructuredText strings will be returned as
257	HTML code:
258
259	>>> from waeup.kofa.utils.helpers import ReST2HTML
260	>>> source = '''
261	... Headline
262	... ========
263	...
264	... - A list item
265	... - Another item
266	...
267	... Thanks for watching!
268	... '''
269	>>> html, warnings = ReST2HTML_w_warnings(source)
270	>>> print html
271	<div class="document" id="headline">
272	<h1 class="title">Headline</h1>
273	<BLANKLINE>
274	<ul class="simple">
275	<li>A list item</li>
276	<li>Another item</li>
277	</ul>
278	<p>Thanks for watching!</p>
279	</div>
280
281	Here no warnings happened, so the `warnings` are ``None``:
282
283	>>> warnings is None
284	True
285
286	If warnings happen then they can be retrieved in the returned
287	``warnings``. We try to render an erraneous document:
288
289	>>> source = '''
290	... Headline
291	... ======
292	...
293	... Thanks for watching!
294	... '''
295	>>> html, warnings = ReST2HTML_w_warnings(source)
296	>>> print html
297	<div class="document" id="headline">
298	<h1 class="title">Headline</h1>
299	<BLANKLINE>
300	<p>Thanks for watching!</p>
301	</div>
302
303	>>> print warnings
304	<string>:3: (WARNING/2) Title underline too short.
305	<BLANKLINE>
306	Headline
307	======
308	<BLANKLINE>
309
310	As you can see, the warnings are not displayed inline the document
311	but can be retrieved from the returned warnings, which is a string
312	or ``None``.
313	"""
314	warnings = StringIO()
315	fulldoc = publish_string(
316	source_string, writer_name='html4css1',
317	settings_overrides={
318	'report_level': 0,
319	'warning_stream': warnings,
320	})
321	warnings.seek(0)
322	warning_msgs = warnings.read()
323	if warning_msgs:
324	# Render again, this time with no warnings inline...
325	fulldoc = publish_string(
326	source_string, writer_name='html4css1',
327	settings_overrides={
328	'report_level': 10000,
329	'halt_level': 10000,
330	'warning_stream': warnings,
331	})
332	if warning_msgs == '':
333	warning_msgs = None
334	result = get_inner_HTML_part(fulldoc).strip()
335	if not isinstance(result, unicode):
336	result = result.decode('utf-8')
337	return result, warning_msgs
338
339	def ReST2HTML(source_string):
340	"""Render a string containing ReStructuredText to HTML.
341
342	Any warnings about too short headings, etc. are silently
343	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
344	warnings.
345
346	The returned string will be unicode.
347
348	A regular document will be rendered like this:
349
350	>>> source = '''
351	... Headline
352	... ========
353	...
354	... Thanks for watching!
355	... '''
356	>>> html = ReST2HTML(source)
357	>>> print html
358	<div class="document" id="headline">
359	<h1 class="title">Headline</h1>
360	<BLANKLINE>
361	<p>Thanks for watching!</p>
362	</div>
363
364	A document with markup problems (here: the underline is too short)
365	will look similar:
366
367	>>> source = '''
368	... Headline
369	... ======
370	...
371	... Thanks for watching!
372	... '''
373	>>> html = ReST2HTML(source)
374	>>> print html
375	<div class="document" id="headline">
376	<h1 class="title">Headline</h1>
377	<BLANKLINE>
378	<p>Thanks for watching!</p>
379	</div>
380
381	"""
382	html, warnings = ReST2HTML_w_warnings(source_string)
383	return html
384
385	def attrs_to_fields(cls):
386	"""Turn the attributes of a class into FieldProperty instances.
387
388	With Python >= 2.6 we can even use this function as a class decorator.
389	"""
390	iface = list(implementedBy(cls))[0]
391	for field_name in getFieldNames(iface):
392	setattr(cls, field_name, FieldProperty(iface[field_name]))
393	return cls
394
395	def get_current_principal():
396	"""Get the 'current' principal.
397
398	This method works without a request. Examining a request is the
399	regular (and recommended) way to get a principal involved
400	'currently'.
401
402	Use this method only if you really have no access to the current
403	request.
404
405	Returns ``None`` when no principal is involved (for instance
406	during tests).
407	"""
408	try:
409	principal = getInteraction().participations[0].principal
410	except NoInteraction:
411	return None
412	except IndexError: # No participations present
413	return None
414	return principal
415
416	def cmp_files(file_descr1, file_descr2):
417	"""Compare two files by their file descriptors.
418
419	Returns ``True`` if both are equal, ``False`` otherwise.
420	"""
421	file_descr1.seek(0)
422	file_descr2.seek(0)
423	while True:
424	b1 = file_descr1.read(BUFSIZE)
425	b2 = file_descr2.read(BUFSIZE)
426	if b1 != b2:
427	return False
428	if not b1:
429	return True
430
431	def string_from_bytes(number):
432	"""Turn a number into some textual representation.
433
434	Examples:
435
436	>>> string_from_bytes(1)
437	u'1 byte(s)'
438
439	>>> string_from_bytes(1025)
440	u'1 KB'
441
442	>>> string_from_bytes(1.5 * 1024*1024)
443	u'1.50 MB'
444
445	>>> string_from_bytes(673.286 * 1024**3)
446	u'673.29 GB'
447
448	"""
449	if number < 1024:
450	return u'%s byte(s)' % (str(number),)
451	elif number < 1024**2:
452	return u'%s KB' % (number / 1024,)
453	elif number < 1024**3:
454	return u'%.2f MB' % (number / 1024**2,)
455	return u'%.2f GB' % (number / 1024**3,)
456
457	def file_size(file_like_obj):
458	"""Determine file size in most effective manner.
459
460	Returns the number of bytes in a file. This function works for
461	both, real files as well as file-like objects like cStringIO based
462	'files'.
463
464	Example:
465
466	>>> from cStringIO import StringIO
467	>>> file_size(StringIO('my file content'))
468	15
469
470	Please note that this function expects the file-like object passed
471	in to be at first reading position (it does no seek(0)) and that
472	when finished the file pointer might be at end of file.
473	"""
474	if hasattr(file_like_obj, 'fileno'):
475	return os.fstat(file_like_obj.fileno())[6]
476	file_like_obj.seek(0, 2) # seek to last position in file
477	return file_like_obj.tell()
478
479	def get_user_account(request):
480	"""Return local user account.
481	"""
482	principal_id = request.principal.id
483	authenticator = getUtility(IAuthenticatorPlugin, name='users')
484	account = authenticator.getAccount(principal_id)
485	return account
486
487	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
488	"""Get all attribute names of an interface.
489
490	Searches also base interfaces.
491
492	Names of fields that are pure attributes
493	(i.e. zope.interface.Attribute) or methods are excluded by
494	default.
495
496	Names of typical fields derived from zope.schema are included.
497
498	The `omit` paramter can give a list of names to exclude.
499
500	Returns an unsorted list of strings.
501	"""
502	ifaces = set((iface,))
503	# Collect all interfaces (also bases) recursively
504	while True:
505	ext_ifaces = set(ifaces)
506	for iface in ext_ifaces:
507	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
508	if ext_ifaces == ifaces:
509	# No new interfaces found, list complete
510	break
511	ifaces = ext_ifaces
512	# Collect (filtered) names of collected interfaces
513	result = []
514	for iface in ifaces:
515	for name, descr in iface.namesAndDescriptions():
516	if name in omit:
517	continue
518	if exclude_attribs and descr.__class__ is Attribute:
519	continue
520	if exclude_methods and isinstance(descr, Method):
521	continue
522	result.append(name)
523	return result
524
525	def get_sorted_preferred(tuples_iterable, preferred_list):
526	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
527	`preferred_list` put in front.
528
529	The rest of the tuples iterable is returned in orginal order. This
530	is useful for putting default entries on top of (already sorted)
531	lists of choice values, for instance when sorting countries and
532	their code.
533
534	Sample:
535
536	We have a list of tuples with uppercase 'titles' and lowercase
537	'tokens'. This list is already sorted but we want certain values
538	of this list to show up before other values. For instance we want
539	to see the 'C' entry to come first.
540
541	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
542	... ['c'])
543	(('C', 'c'), ('A', 'a'), ('B', 'b'))
544
545	i.e. the entry with 'c' as second value moved to head of result.
546
547	We can also require multiple entries at head of list:
548
549	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
550	... ['b', 'c'])
551	(('B', 'b'), ('C', 'c'), ('A', 'a'))
552
553	We required the 'b' entry to come before the 'c' entry and then
554	the rest of the input list. That's what we got.
555
556	The result is returned as a tuple of tuples to keep order of values.
557	"""
558	result = [None for x in preferred_list]
559	for title, code in tuples_iterable:
560	if code in preferred_list:
561	index = preferred_list.index(code)
562	result[index] = (title, code)
563	else:
564	result.append((title, code))
565	return tuple(result)
566
567	def now(tz=None):
568	"""Get current datetime in timezone of `tz`.
569
570	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
571
572	`tz` should be a timezone as defined in pytz.
573	"""
574	return to_timezone(datetime.datetime.utcnow(), tz=tz)
575
576	def to_timezone(dt, tz=None):
577	"""Shift datetime into timezone `tz`.
578
579	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
580	assumed to be UTC.
581
582	If no `tz` is given, shift to UTC is performed.
583
584	If `dt` is not a datetime.datetime, the input value is returned
585	unchanged.
586	"""
587	if not isinstance(dt, datetime.datetime):
588	return dt
589	if tz is None:
590	tz = pytz.utc
591	if dt.tzinfo is None:
592	dt = pytz.utc.localize(dt)
593	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
594
595	def get_fileformat(path, bytestream=None):
596	"""Try to determine the file format of a given media file.
597
598	Although checks done here are not done very thoroughly, they make
599	no assumptions about the filetype by looking at its filename
600	extension or similar. Instead they check header data to comply
601	with common known rules (Magic Words).
602
603	If bytestream is not `None` the `path` is ignored.
604
605	Returns filetype as string (something like ``'jpg'``) if
606	file-format can be recognized, ``None`` else.
607
608	Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
609
610	More filetypes (though untested in waeup.kofa) are automatically
611	recognized because we deploy the stdlib `imghdr` library. See this
612	module's docs for a complete list of filetypes recognized.
613	"""
614	if path is None and bytestream is None:
615	return None
616
617	img_type = None
618	if bytestream is not None:
619	img_type = imghdr.what(path, bytestream)
620	else:
621	img_type = imghdr.what(path)
622	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
623	if img_type == name:
624	img_type = replacement
625	return img_type
626
627	def check_pdf(bytestream, file):
628	"""Tell whether a file or bytestream is a PDF file.
629
630	Works as a test/plugin for the stdlib `imghdr` library.
631	"""
632	if file is not None:
633	file.seek(0)
634	bytestream = file.read(4)
635	file.seek(0)
636
637	if bytestream.startswith('%PDF'):
638	return 'pdf'
639	return None
640
641	# register check_pdf as header check function with `imghdr`
642	if check_pdf not in imghdr.tests:
643	imghdr.tests.append(check_pdf)
644
645	def merge_csv_files(path1, path2):
646	"""Merge two CSV files into one (appending).
647
648	CSV data from `path2` will be merged into `path1` csv file. This
649	is a bit like 'appending' data from path2 to data from path1.
650
651	The path of the resulting temporary file will be returned.
652
653	In the result file data from `path2` will always come _after_ data
654	from `path1`.
655
656	Caution: It is the _callers_ responsibility to remove the
657	result file (which is created by tempfile.mkstemp) after usage.
658
659	This CSV file merging copes with different column orders in both
660	CSV files and even with different column sets in both files.
661
662	Also broken/empty CSV files can be handled.
663	"""
664	# sniff the col names
665	try:
666	row10 = csv.DictReader(open(path1, 'rb')).next()
667	except StopIteration:
668	row10 = dict()
669	try:
670	row20 = csv.DictReader(open(path2, 'rb')).next()
671	except StopIteration:
672	row20 = dict()
673	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
674	# now read/write the real data
675	reader1 = csv.DictReader(open(path1, 'rb'))
676	reader2 = csv.DictReader(open(path2, 'rb'))
677	wp, tmp_path = tempfile.mkstemp()
678	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
679	writer.writerow(dict((x,x) for x in fieldnames)) # header
680	for row in reader1:
681	writer.writerow(row)
682	for row in reader2:
683	writer.writerow(row)
684	return tmp_path
685
686	# Save function used for save methods in pages
687	def msave(view, **data):
688	changed_fields = view.applyData(view.context, **data)
689	# Turn list of lists into single list
690	if changed_fields:
691	changed_fields = reduce(lambda x,y: x+y, changed_fields.values())
692	fields_string = ' + '.join(changed_fields)
693	view.flash(_('Form has been saved.'))
694	ob_class = view.__implemented__.__name__.replace('waeup.kofa.','')
695	if fields_string:
696	grok.getSite().logger.info('%s - saved: %s' % (ob_class, fields_string))
697	return

Note: See TracBrowser for help on using the repository browser.

Download in other formats: