Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 11476

Last change on this file since 11476 was 11476, checked in by Henrik Bettermann, 11 years ago
Replace default FieldProperty? docstring by field title.
Property svn:keywords set to `Id`
File size: 23.2 KB

Line
1	## $Id: helpers.py 11476 2014-03-07 09:32:43Z henrik $
2	##
3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""General helper functions for Kofa.
19	"""
20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
21	import datetime
22	import imghdr
23	import logging
24	import os
25	import pytz
26	import re
27	import shutil
28	import tempfile
29	import grok
30	from cStringIO import StringIO
31	from docutils.core import publish_string
32	from zope.component import getUtility
33	from zope.component.interfaces import IFactory
34	from zope.interface import implementedBy
35	from zope.interface.interface import Method, Attribute
36	from zope.schema import getFieldNames
37	from zope.schema.fieldproperty import FieldProperty
38	from zope.security.interfaces import NoInteraction
39	from zope.security.management import getInteraction
40	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
41
42	BUFSIZE = 8 * 1024
43
44
45	def remove_file_or_directory(filepath):
46	"""Remove a file or directory.
47
48	Different to :func:`shutil.rmtree` we also accept not existing
49	paths (returning silently) and if a dir turns out to be a regular
50	file, we remove that.
51	"""
52	filepath = os.path.abspath(filepath)
53	if not os.path.exists(filepath):
54	return
55	if os.path.isdir(filepath):
56	shutil.rmtree(filepath)
57	else:
58	os.unlink(filepath)
59	return
60
61
62	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
63	"""Copy contents of directory src to directory dst.
64
65	Both directories must exists.
66
67	If `overwrite` is true, any same named objects will be
68	overwritten. Otherwise these files will not be touched.
69
70	If `del_old` is true, copied files and directories will be removed
71	from the src directory.
72
73	This functions returns a list of non-copied files.
74
75	Unix hidden files and directories (starting with '.') are not
76	processed by this function.
77	"""
78	if not os.path.exists(src):
79	raise ValueError('source path does not exist: %s' % src)
80	if not os.path.exists(dst):
81	raise ValueError('destination path does not exist: %s' % dst)
82	if not os.path.isdir(src):
83	raise ValueError('source path is not a directory: %s' % src)
84	if not os.path.isdir(dst):
85	raise ValueError('destination path is not a directory: %s' % dst)
86	not_copied = []
87	for item in os.listdir(src):
88	if item.startswith('.'):
89	continue # We do not copy hidden stuff...
90	itemsrc = os.path.join(src, item)
91	itemdst = os.path.join(dst, item)
92
93	if os.path.exists(itemdst):
94	if overwrite is True:
95	remove_file_or_directory(itemdst)
96	else:
97	not_copied.append(item)
98	continue
99
100	if os.path.isdir(itemsrc):
101	shutil.copytree(itemsrc, itemdst)
102	else:
103	shutil.copy2(itemsrc, itemdst)
104	if del_old:
105	remove_file_or_directory(itemsrc)
106	return not_copied
107
108
109	def get_inner_HTML_part(html_code):
110	"""Return the 'inner' part of a complete HTML snippet.
111
112	If there is a form part, get this.
113
114	If there is no form part, try to return the body part contents.
115
116	If there is no body, return as-is.
117
118	Let's see how that works. If we deliver some doc with form, we
119	will get that form only:
120
121	>>> doc = '<html><form>My Form</form>Outside the form</html>'
122	>>> get_inner_HTML_part(doc)
123	'<form>My Form</form>'
124
125	No form? Then seek for a body part and get the contents:
126
127	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
128	>>> get_inner_HTML_part(doc)
129	'My Body'
130
131	If none of these is included, return what we got:
132
133	>>> doc = '<html>without body nor form</html>'
134	>>> get_inner_HTML_part(doc)
135	'<html>without body nor form</html>'
136
137	"""
138
139	try:
140	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
141	re.DOTALL).groups()[0]
142	return result
143	except AttributeError:
144	# No <form> part included
145	try:
146	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
147	re.DOTALL).groups()[0]
148	return result
149	except AttributeError:
150	# No <form> and no <body> tag...
151	pass
152	return html_code
153
154
155	class FactoryBase(grok.GlobalUtility):
156	"""A factory for things.
157
158	This is a baseclass for easier creation of factories. Factories
159	are utilities that are registered under a certain name and return
160	instances of certain classes when called.
161
162	In :mod:`waeup.kofa` we use factories extensively for
163	batching. While processing a batch some processors looks up a
164	factory to create real-world instances that then get filled with
165	data from imported CSV files.
166
167	To get rid of reimplementing the same stuff over and over again,
168	most notably the methods defined here, we offer this base class
169	(which will not be registered as a factory itself).
170
171	Real factories can then be created like this:
172
173	>>> import grok
174	>>> from waeup.kofa.utils.helpers import FactoryBase
175	>>> class MyObject(object):
176	... # Some class we want to get instances of.
177	... pass
178	>>> class MyObjectFactory(FactoryBase):
179	... # This is the factory for MyObject instances
180	... grok.name(u'waeup.kofa.factory.MyObject')
181	... factory = MyObject
182
183	That's it. It is essential to set the ``factory`` attribute, which
184	will determine the class of which instances should be created when
185	called. The given name must even be unique amongst all utilities
186	registered during runtime. While you can pick any name you like
187	you might want to prepend ``waeup.kofa.factory.`` to the name
188	string to make sure it does not clash with names of other
189	utilities one day.
190
191	Before all this works we have to grok the baseclass once and our
192	freshly defined factory. This executes all the component
193	registration stuff we don't want to do ourselves. In daily use
194	this is done automatically on startup of a :mod:`waeup.kofa`
195	system.
196
197	>>> grok.testing.grok('waeup.kofa.utils.helpers')
198	>>> grok.testing.grok_component(
199	... 'MyObjectFactory', MyObjectFactory
200	... )
201	True
202
203	After grokking we (and processors) can create objects without
204	knowing about the location of the real class definition, just by
205	the factory name:
206
207	>>> from zope.component import createObject
208	>>> obj = createObject('waeup.kofa.factory.MyObject')
209	>>> isinstance(obj, MyObject)
210	True
211
212	We can also use the regular utility lookups to find our new
213	factory:
214
215	>>> from zope.component import getUtility
216	>>> from zope.component.interfaces import IFactory
217	>>> factory = getUtility(
218	... IFactory, name='waeup.kofa.factory.MyObject'
219	... )
220	>>> isinstance(factory, MyObjectFactory)
221	True
222
223	And this factory generates `MyObject` instances:
224
225	>>> obj = factory()
226	>>> isinstance(obj, MyObject)
227	True
228
229	"""
230	grok.baseclass() # Do not grok this class, do not register us.
231	grok.implements(IFactory)
232	# You can override any of the following attributes in derived
233	# classes. The `grok.name` setting must even be set to some
234	# unique value.
235	grok.name(u'waeup.Factory')
236	title = u"Create instances of ``factory``.",
237	description = u"This factory instantiates new applicant instances."
238	factory = None
239
240	def __call__(self, args, *kw):
241	"""The main factory function.
242
243	Returns an instance of the requested object.
244	"""
245	return self.factory()
246
247	def getInterfaces(self):
248	# Required by IFactory
249	return implementedBy(self.factory)
250
251
252	def ReST2HTML_w_warnings(source_string):
253	"""Convert a reStructuredText string to HTML preserving warnings.
254
255	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
256	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
257	source string (in unicode), ``<WARNINGS>`` is a string containing
258	any warning messages or ``None``.
259
260	Regular multi-line ReStructuredText strings will be returned as
261	HTML code:
262
263	>>> from waeup.kofa.utils.helpers import ReST2HTML
264	>>> source = '''
265	... Headline
266	... ========
267	...
268	... - A list item
269	... - Another item
270	...
271	... Thanks for watching!
272	... '''
273	>>> html, warnings = ReST2HTML_w_warnings(source)
274	>>> print html
275	<div class="document" id="headline">
276	<h1 class="title">Headline</h1>
277	<BLANKLINE>
278	<ul class="simple">
279	<li>A list item</li>
280	<li>Another item</li>
281	</ul>
282	<p>Thanks for watching!</p>
283	</div>
284
285	Here no warnings happened, so the `warnings` are ``None``:
286
287	>>> warnings is None
288	True
289
290	If warnings happen then they can be retrieved in the returned
291	``warnings``. We try to render an erraneous document:
292
293	>>> source = '''
294	... Headline
295	... ======
296	...
297	... Thanks for watching!
298	... '''
299	>>> html, warnings = ReST2HTML_w_warnings(source)
300	>>> print html
301	<div class="document" id="headline">
302	<h1 class="title">Headline</h1>
303	<BLANKLINE>
304	<p>Thanks for watching!</p>
305	</div>
306
307	>>> print warnings
308	<string>:3: (WARNING/2) Title underline too short.
309	<BLANKLINE>
310	Headline
311	======
312	<BLANKLINE>
313
314	As you can see, the warnings are not displayed inline the document
315	but can be retrieved from the returned warnings, which is a string
316	or ``None``.
317	"""
318	warnings = StringIO()
319	fulldoc = publish_string(
320	source_string, writer_name='html4css1',
321	settings_overrides={
322	'report_level': 0,
323	'warning_stream': warnings,
324	})
325	warnings.seek(0)
326	warning_msgs = warnings.read()
327	if warning_msgs:
328	# Render again, this time with no warnings inline...
329	fulldoc = publish_string(
330	source_string, writer_name='html4css1',
331	settings_overrides={
332	'report_level': 10000,
333	'halt_level': 10000,
334	'warning_stream': warnings,
335	})
336	if warning_msgs == '':
337	warning_msgs = None
338	result = get_inner_HTML_part(fulldoc).strip()
339	if not isinstance(result, unicode):
340	result = result.decode('utf-8')
341	return result, warning_msgs
342
343
344	def ReST2HTML(source_string):
345	"""Render a string containing ReStructuredText to HTML.
346
347	Any warnings about too short headings, etc. are silently
348	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
349	warnings.
350
351	The returned string will be unicode.
352
353	A regular document will be rendered like this:
354
355	>>> source = '''
356	... Headline
357	... ========
358	...
359	... Thanks for watching!
360	... '''
361	>>> html = ReST2HTML(source)
362	>>> print html
363	<div class="document" id="headline">
364	<h1 class="title">Headline</h1>
365	<BLANKLINE>
366	<p>Thanks for watching!</p>
367	</div>
368
369	A document with markup problems (here: the underline is too short)
370	will look similar:
371
372	>>> source = '''
373	... Headline
374	... ======
375	...
376	... Thanks for watching!
377	... '''
378	>>> html = ReST2HTML(source)
379	>>> print html
380	<div class="document" id="headline">
381	<h1 class="title">Headline</h1>
382	<BLANKLINE>
383	<p>Thanks for watching!</p>
384	</div>
385
386	"""
387	html, warnings = ReST2HTML_w_warnings(source_string)
388	return html
389
390
391	def attrs_to_fields(cls, omit=[]):
392	"""Turn the attributes of a class into FieldProperty instances.
393
394	With Python >= 2.6 we can even use this function as a class decorator.
395
396	`omit` is a list of field names that should _not_ be turned into
397	field properties. This is useful for properties and the like.
398	"""
399	iface = list(implementedBy(cls))[0]
400	for field_name in getFieldNames(iface):
401	if field_name in omit:
402	continue
403	field_property = FieldProperty(iface[field_name])
404	# Set proper docstring for the API docs.
405	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
406	setattr(cls, field_name, field_property)
407	return cls
408
409
410	def get_current_principal():
411	"""Get the 'current' principal.
412
413	This method works without a request. Examining a request is the
414	regular (and recommended) way to get a principal involved
415	'currently'.
416
417	Use this method only if you really have no access to the current
418	request.
419
420	Returns ``None`` when no principal is involved (for instance
421	during tests).
422	"""
423	try:
424	principal = getInteraction().participations[0].principal
425	except NoInteraction:
426	return None
427	except IndexError: # No participations present
428	return None
429	return principal
430
431
432	def cmp_files(file_descr1, file_descr2):
433	"""Compare two files by their file descriptors.
434
435	Returns ``True`` if both are equal, ``False`` otherwise.
436	"""
437	file_descr1.seek(0)
438	file_descr2.seek(0)
439	while True:
440	b1 = file_descr1.read(BUFSIZE)
441	b2 = file_descr2.read(BUFSIZE)
442	if b1 != b2:
443	return False
444	if not b1:
445	return True
446
447
448	def string_from_bytes(number):
449	"""Turn a number into some textual representation.
450
451	Examples:
452
453	>>> string_from_bytes(1)
454	u'1 byte(s)'
455
456	>>> string_from_bytes(1025)
457	u'1 KB'
458
459	>>> string_from_bytes(1.5 * 1024*1024)
460	u'1.50 MB'
461
462	>>> string_from_bytes(673.286 * 1024**3)
463	u'673.29 GB'
464
465	"""
466	if number < 1024:
467	return u'%s byte(s)' % (str(number),)
468	elif number < 1024 ** 2:
469	return u'%s KB' % (number / 1024,)
470	elif number < 1024 ** 3:
471	return u'%.2f MB' % (number / 1024 ** 2,)
472	return u'%.2f GB' % (number / 1024 ** 3,)
473
474
475	def file_size(file_like_obj):
476	"""Determine file size in most effective manner.
477
478	Returns the number of bytes in a file. This function works for
479	both, real files as well as file-like objects like cStringIO based
480	'files'.
481
482	Example:
483
484	>>> from cStringIO import StringIO
485	>>> file_size(StringIO('my file content'))
486	15
487
488	Please note that this function expects the file-like object passed
489	in to be at first reading position (it does no seek(0)) and that
490	when finished the file pointer might be at end of file.
491	"""
492	if hasattr(file_like_obj, 'fileno'):
493	return os.fstat(file_like_obj.fileno())[6]
494	file_like_obj.seek(0, 2) # seek to last position in file
495	return file_like_obj.tell()
496
497
498	def get_user_account(request):
499	"""Return local user account.
500	"""
501	principal_id = request.principal.id
502	authenticator = getUtility(IAuthenticatorPlugin, name='users')
503	account = authenticator.getAccount(principal_id)
504	return account
505
506
507	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
508	"""Get all attribute names of an interface.
509
510	Searches also base interfaces.
511
512	Names of fields that are pure attributes
513	(i.e. zope.interface.Attribute) or methods are excluded by
514	default.
515
516	Names of typical fields derived from zope.schema are included.
517
518	The `omit` paramter can give a list of names to exclude.
519
520	Returns an unsorted list of strings.
521	"""
522	ifaces = set((iface,))
523	# Collect all interfaces (also bases) recursively
524	while True:
525	ext_ifaces = set(ifaces)
526	for iface in ext_ifaces:
527	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
528	if ext_ifaces == ifaces:
529	# No new interfaces found, list complete
530	break
531	ifaces = ext_ifaces
532	# Collect (filtered) names of collected interfaces
533	result = []
534	for iface in ifaces:
535	for name, descr in iface.namesAndDescriptions():
536	if name in omit:
537	continue
538	if exclude_attribs and descr.__class__ is Attribute:
539	continue
540	if exclude_methods and isinstance(descr, Method):
541	continue
542	if name in result:
543	continue
544	result.append(name)
545	return result
546
547
548	def get_sorted_preferred(tuples_iterable, preferred_list):
549	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
550	`preferred_list` put in front.
551
552	The rest of the tuples iterable is returned in orginal order. This
553	is useful for putting default entries on top of (already sorted)
554	lists of choice values, for instance when sorting countries and
555	their code.
556
557	Sample:
558
559	We have a list of tuples with uppercase 'titles' and lowercase
560	'tokens'. This list is already sorted but we want certain values
561	of this list to show up before other values. For instance we want
562	to see the 'C' entry to come first.
563
564	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
565	... ['c'])
566	(('C', 'c'), ('A', 'a'), ('B', 'b'))
567
568	i.e. the entry with 'c' as second value moved to head of result.
569
570	We can also require multiple entries at head of list:
571
572	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
573	... ['b', 'c'])
574	(('B', 'b'), ('C', 'c'), ('A', 'a'))
575
576	We required the 'b' entry to come before the 'c' entry and then
577	the rest of the input list. That's what we got.
578
579	The result is returned as a tuple of tuples to keep order of values.
580	"""
581	result = [None for x in preferred_list]
582	for title, code in tuples_iterable:
583	if code in preferred_list:
584	index = preferred_list.index(code)
585	result[index] = (title, code)
586	else:
587	result.append((title, code))
588	return tuple(result)
589
590
591	def now(tz=None):
592	"""Get current datetime in timezone of `tz`.
593
594	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
595
596	`tz` should be a timezone as defined in pytz.
597	"""
598	return to_timezone(datetime.datetime.utcnow(), tz=tz)
599
600
601	def to_timezone(dt, tz=None):
602	"""Shift datetime into timezone `tz`.
603
604	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
605	assumed to be UTC.
606
607	If no `tz` is given, shift to UTC is performed.
608
609	If `dt` is not a datetime.datetime, the input value is returned
610	unchanged.
611	"""
612	if not isinstance(dt, datetime.datetime):
613	return dt
614	if tz is None:
615	tz = pytz.utc
616	if dt.tzinfo is None:
617	dt = pytz.utc.localize(dt)
618	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
619
620
621	def get_fileformat(path, bytestream=None):
622	"""Try to determine the file format of a given media file.
623
624	Although checks done here are not done very thoroughly, they make
625	no assumptions about the filetype by looking at its filename
626	extension or similar. Instead they check header data to comply
627	with common known rules (Magic Words).
628
629	If bytestream is not `None` the `path` is ignored.
630
631	Returns filetype as string (something like ``'jpg'``) if
632	file-format can be recognized, ``None`` else.
633
634	Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
635
636	More filetypes (though untested in waeup.kofa) are automatically
637	recognized because we deploy the stdlib `imghdr` library. See this
638	module's docs for a complete list of filetypes recognized.
639	"""
640	if path is None and bytestream is None:
641	return None
642
643	img_type = None
644	if bytestream is not None:
645	img_type = imghdr.what(path, bytestream)
646	else:
647	img_type = imghdr.what(path)
648	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
649	if img_type == name:
650	img_type = replacement
651	return img_type
652
653
654	def check_pdf(bytestream, file):
655	"""Tell whether a file or bytestream is a PDF file.
656
657	Works as a test/plugin for the stdlib `imghdr` library.
658	"""
659	if file is not None:
660	file.seek(0)
661	bytestream = file.read(4)
662	file.seek(0)
663
664	if bytestream.startswith('%PDF'):
665	return 'pdf'
666	return None
667
668	# register check_pdf as header check function with `imghdr`
669	if check_pdf not in imghdr.tests:
670	imghdr.tests.append(check_pdf)
671
672
673	def merge_csv_files(path1, path2):
674	"""Merge two CSV files into one (appending).
675
676	CSV data from `path2` will be merged into `path1` csv file. This
677	is a bit like 'appending' data from path2 to data from path1.
678
679	The path of the resulting temporary file will be returned.
680
681	In the result file data from `path2` will always come _after_ data
682	from `path1`.
683
684	Caution: It is the _callers_ responsibility to remove the
685	result file (which is created by tempfile.mkstemp) after usage.
686
687	This CSV file merging copes with different column orders in both
688	CSV files and even with different column sets in both files.
689
690	Also broken/empty CSV files can be handled.
691	"""
692	# sniff the col names
693	try:
694	row10 = csv.DictReader(open(path1, 'rb')).next()
695	except StopIteration:
696	row10 = dict()
697	try:
698	row20 = csv.DictReader(open(path2, 'rb')).next()
699	except StopIteration:
700	row20 = dict()
701	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
702	# now read/write the real data
703	reader1 = csv.DictReader(open(path1, 'rb'))
704	reader2 = csv.DictReader(open(path2, 'rb'))
705	wp, tmp_path = tempfile.mkstemp()
706	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
707	writer.writerow(dict((x, x) for x in fieldnames)) # header
708	for row in reader1:
709	writer.writerow(row)
710	for row in reader2:
711	writer.writerow(row)
712	return tmp_path
713
714
715	def product(sequence, start=1):
716	"""Returns the product of a sequence of numbers (_not_ strings)
717	multiplied by the parameter `start` (defaults to 1). If the
718	sequence is empty, returns 0.
719	"""
720	if not len(sequence):
721	return 0
722	result = start
723	for item in sequence:
724	result *= item
725	return result
726
727
728	class NullHandler(logging.Handler):
729	"""A logging NullHandler.
730
731	Does not log anything. Useful if you want to shut up a log.
732
733	Defined here for backwards compatibility with Python < 2.7.
734	"""
735	def emit(self, record):
736	pass
737
738
739	def check_csv_charset(iterable):
740	"""Check contents of `iterable` regarding valid CSV encoding.
741
742	`iterable` is expected to be an iterable on _rows_ (not
743	chars). This is true for instance for
744	filehandlers. `zope.publisher.browser.FileUpload` instances are
745	_not_ iterable, unfortunately.
746
747	Returns line num of first illegal char or ``None``. Line nums
748	start counting with 1 (not zero).
749	"""
750	linenum = 1
751	reader = csv.DictReader(iterable)
752	try:
753	for row in reader:
754	linenum += 1
755	except UnicodeDecodeError:
756	return linenum
757	except:
758	return linenum + 1
759	return None

Note: See TracBrowser for help on using the repository browser.

Download in other formats: