Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 9237

Last change on this file since 9237 was 9043, checked in by Henrik Bettermann, 12 years ago
Do not export repeated fields twice.
Property svn:keywords set to `Id`
File size: 21.6 KB

Line
1	## $Id: helpers.py 9043 2012-07-23 21:08:27Z henrik $
2	##
3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""General helper functions for Kofa.
19	"""
20	import csv
21	import datetime
22	import imghdr
23	import os
24	import pytz
25	import re
26	import shutil
27	import tempfile
28	import grok
29	from cStringIO import StringIO
30	from docutils.core import publish_string
31	from zope.component import getUtility
32	from zope.component.interfaces import IFactory
33	from zope.interface import implementedBy
34	from zope.interface.interface import Method, Attribute
35	from zope.schema import getFieldNames
36	from zope.schema.fieldproperty import FieldProperty
37	from zope.security.interfaces import NoInteraction
38	from zope.security.management import getInteraction
39	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
40	from waeup.kofa.interfaces import MessageFactory as _
41
42	BUFSIZE = 8 * 1024
43
44	def remove_file_or_directory(filepath):
45	"""Remove a file or directory.
46
47	Different to :func:`shutil.rmtree` we also accept not existing
48	paths (returning silently) and if a dir turns out to be a regular
49	file, we remove that.
50	"""
51	filepath = os.path.abspath(filepath)
52	if not os.path.exists(filepath):
53	return
54	if os.path.isdir(filepath):
55	shutil.rmtree(filepath)
56	else:
57	os.unlink(filepath)
58	return
59
60	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
61	"""Copy contents of directory src to directory dst.
62
63	Both directories must exists.
64
65	If `overwrite` is true, any same named objects will be
66	overwritten. Otherwise these files will not be touched.
67
68	If `del_old` is true, copied files and directories will be removed
69	from the src directory.
70
71	This functions returns a list of non-copied files.
72
73	Unix hidden files and directories (starting with '.') are not
74	processed by this function.
75	"""
76	if not os.path.exists(src):
77	raise ValueError('source path does not exist: %s' % src)
78	if not os.path.exists(dst):
79	raise ValueError('destination path does not exist: %s' % dst)
80	if not os.path.isdir(src):
81	raise ValueError('source path is not a directory: %s' % src)
82	if not os.path.isdir(dst):
83	raise ValueError('destination path is not a directory: %s' % dst)
84	not_copied = []
85	for item in os.listdir(src):
86	if item.startswith('.'):
87	continue # We do not copy hidden stuff...
88	itemsrc = os.path.join(src, item)
89	itemdst = os.path.join(dst, item)
90
91	if os.path.exists(itemdst):
92	if overwrite is True:
93	remove_file_or_directory(itemdst)
94	else:
95	not_copied.append(item)
96	continue
97
98	if os.path.isdir(itemsrc):
99	shutil.copytree(itemsrc, itemdst)
100	else:
101	shutil.copy2(itemsrc, itemdst)
102	if del_old:
103	remove_file_or_directory(itemsrc)
104	return not_copied
105
106
107	def get_inner_HTML_part(html_code):
108	"""Return the 'inner' part of a complete HTML snippet.
109
110	If there is a form part, get this.
111
112	If there is no form part, try to return the body part contents.
113
114	If there is no body, return as-is.
115
116	Let's see how that works. If we deliver some doc with form, we
117	will get that form only:
118
119	>>> doc = '<html><form>My Form</form>Outside the form</html>'
120	>>> get_inner_HTML_part(doc)
121	'<form>My Form</form>'
122
123	No form? Then seek for a body part and get the contents:
124
125	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
126	>>> get_inner_HTML_part(doc)
127	'My Body'
128
129	If none of these is included, return what we got:
130
131	>>> doc = '<html>without body nor form</html>'
132	>>> get_inner_HTML_part(doc)
133	'<html>without body nor form</html>'
134
135	"""
136
137	try:
138	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
139	re.DOTALL).groups()[0]
140	return result
141	except AttributeError:
142	# No <form> part included
143	try:
144	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
145	re.DOTALL).groups()[0]
146	return result
147	except AttributeError:
148	# No <form> and no <body> tag...
149	pass
150	return html_code
151
152	class FactoryBase(grok.GlobalUtility):
153	"""A factory for things.
154
155	This is a baseclass for easier creation of factories. Factories
156	are utilities that are registered under a certain name and return
157	instances of certain classes when called.
158
159	In :mod:`waeup.kofa` we use factories extensively for
160	batching. While processing a batch some processors looks up a
161	factory to create real-world instances that then get filled with
162	data from imported CSV files.
163
164	To get rid of reimplementing the same stuff over and over again,
165	most notably the methods defined here, we offer this base class
166	(which will not be registered as a factory itself).
167
168	Real factories can then be created like this:
169
170	>>> import grok
171	>>> from waeup.kofa.utils.helpers import FactoryBase
172	>>> class MyObject(object):
173	... # Some class we want to get instances of.
174	... pass
175	>>> class MyObjectFactory(FactoryBase):
176	... # This is the factory for MyObject instances
177	... grok.name(u'waeup.kofa.factory.MyObject')
178	... factory = MyObject
179
180	That's it. It is essential to set the ``factory`` attribute, which
181	will determine the class of which instances should be created when
182	called. The given name must even be unique amongst all utilities
183	registered during runtime. While you can pick any name you like
184	you might want to prepend ``waeup.kofa.factory.`` to the name
185	string to make sure it does not clash with names of other
186	utilities one day.
187
188	Before all this works we have to grok the baseclass once and our
189	freshly defined factory. This executes all the component
190	registration stuff we don't want to do ourselves. In daily use
191	this is done automatically on startup of a :mod:`waeup.kofa`
192	system.
193
194	>>> grok.testing.grok('waeup.kofa.utils.helpers')
195	>>> grok.testing.grok_component(
196	... 'MyObjectFactory', MyObjectFactory
197	... )
198	True
199
200	After grokking we (and processors) can create objects without
201	knowing about the location of the real class definition, just by
202	the factory name:
203
204	>>> from zope.component import createObject
205	>>> obj = createObject('waeup.kofa.factory.MyObject')
206	>>> isinstance(obj, MyObject)
207	True
208
209	We can also use the regular utility lookups to find our new
210	factory:
211
212	>>> from zope.component import getUtility
213	>>> from zope.component.interfaces import IFactory
214	>>> factory = getUtility(
215	... IFactory, name='waeup.kofa.factory.MyObject'
216	... )
217	>>> isinstance(factory, MyObjectFactory)
218	True
219
220	And this factory generates `MyObject` instances:
221
222	>>> obj = factory()
223	>>> isinstance(obj, MyObject)
224	True
225
226	"""
227	grok.baseclass() # Do not grok this class, do not register us.
228	grok.implements(IFactory)
229	# You can override any of the following attributes in derived
230	# classes. The `grok.name` setting must even be set to some
231	# unique value.
232	grok.name(u'waeup.Factory')
233	title = u"Create instances of ``factory``.",
234	description = u"This factory instantiates new applicant instances."
235	factory = None
236
237	def __call__(self, args, *kw):
238	"""The main factory function.
239
240	Returns an instance of the requested object.
241	"""
242	return self.factory()
243
244	def getInterfaces(self):
245	# Required by IFactory
246	return implementedBy(self.factory)
247
248	def ReST2HTML_w_warnings(source_string):
249	"""Convert a reStructuredText string to HTML preserving warnings.
250
251	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
252	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
253	source string (in unicode), ``<WARNINGS>`` is a string containing
254	any warning messages or ``None``.
255
256	Regular multi-line ReStructuredText strings will be returned as
257	HTML code:
258
259	>>> from waeup.kofa.utils.helpers import ReST2HTML
260	>>> source = '''
261	... Headline
262	... ========
263	...
264	... - A list item
265	... - Another item
266	...
267	... Thanks for watching!
268	... '''
269	>>> html, warnings = ReST2HTML_w_warnings(source)
270	>>> print html
271	<div class="document" id="headline">
272	<h1 class="title">Headline</h1>
273	<BLANKLINE>
274	<ul class="simple">
275	<li>A list item</li>
276	<li>Another item</li>
277	</ul>
278	<p>Thanks for watching!</p>
279	</div>
280
281	Here no warnings happened, so the `warnings` are ``None``:
282
283	>>> warnings is None
284	True
285
286	If warnings happen then they can be retrieved in the returned
287	``warnings``. We try to render an erraneous document:
288
289	>>> source = '''
290	... Headline
291	... ======
292	...
293	... Thanks for watching!
294	... '''
295	>>> html, warnings = ReST2HTML_w_warnings(source)
296	>>> print html
297	<div class="document" id="headline">
298	<h1 class="title">Headline</h1>
299	<BLANKLINE>
300	<p>Thanks for watching!</p>
301	</div>
302
303	>>> print warnings
304	<string>:3: (WARNING/2) Title underline too short.
305	<BLANKLINE>
306	Headline
307	======
308	<BLANKLINE>
309
310	As you can see, the warnings are not displayed inline the document
311	but can be retrieved from the returned warnings, which is a string
312	or ``None``.
313	"""
314	warnings = StringIO()
315	fulldoc = publish_string(
316	source_string, writer_name='html4css1',
317	settings_overrides={
318	'report_level': 0,
319	'warning_stream': warnings,
320	})
321	warnings.seek(0)
322	warning_msgs = warnings.read()
323	if warning_msgs:
324	# Render again, this time with no warnings inline...
325	fulldoc = publish_string(
326	source_string, writer_name='html4css1',
327	settings_overrides={
328	'report_level': 10000,
329	'halt_level': 10000,
330	'warning_stream': warnings,
331	})
332	if warning_msgs == '':
333	warning_msgs = None
334	result = get_inner_HTML_part(fulldoc).strip()
335	if not isinstance(result, unicode):
336	result = result.decode('utf-8')
337	return result, warning_msgs
338
339	def ReST2HTML(source_string):
340	"""Render a string containing ReStructuredText to HTML.
341
342	Any warnings about too short headings, etc. are silently
343	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
344	warnings.
345
346	The returned string will be unicode.
347
348	A regular document will be rendered like this:
349
350	>>> source = '''
351	... Headline
352	... ========
353	...
354	... Thanks for watching!
355	... '''
356	>>> html = ReST2HTML(source)
357	>>> print html
358	<div class="document" id="headline">
359	<h1 class="title">Headline</h1>
360	<BLANKLINE>
361	<p>Thanks for watching!</p>
362	</div>
363
364	A document with markup problems (here: the underline is too short)
365	will look similar:
366
367	>>> source = '''
368	... Headline
369	... ======
370	...
371	... Thanks for watching!
372	... '''
373	>>> html = ReST2HTML(source)
374	>>> print html
375	<div class="document" id="headline">
376	<h1 class="title">Headline</h1>
377	<BLANKLINE>
378	<p>Thanks for watching!</p>
379	</div>
380
381	"""
382	html, warnings = ReST2HTML_w_warnings(source_string)
383	return html
384
385	def attrs_to_fields(cls):
386	"""Turn the attributes of a class into FieldProperty instances.
387
388	With Python >= 2.6 we can even use this function as a class decorator.
389	"""
390	iface = list(implementedBy(cls))[0]
391	for field_name in getFieldNames(iface):
392	setattr(cls, field_name, FieldProperty(iface[field_name]))
393	return cls
394
395	def get_current_principal():
396	"""Get the 'current' principal.
397
398	This method works without a request. Examining a request is the
399	regular (and recommended) way to get a principal involved
400	'currently'.
401
402	Use this method only if you really have no access to the current
403	request.
404
405	Returns ``None`` when no principal is involved (for instance
406	during tests).
407	"""
408	try:
409	principal = getInteraction().participations[0].principal
410	except NoInteraction:
411	return None
412	except IndexError: # No participations present
413	return None
414	return principal
415
416	def cmp_files(file_descr1, file_descr2):
417	"""Compare two files by their file descriptors.
418
419	Returns ``True`` if both are equal, ``False`` otherwise.
420	"""
421	file_descr1.seek(0)
422	file_descr2.seek(0)
423	while True:
424	b1 = file_descr1.read(BUFSIZE)
425	b2 = file_descr2.read(BUFSIZE)
426	if b1 != b2:
427	return False
428	if not b1:
429	return True
430
431	def string_from_bytes(number):
432	"""Turn a number into some textual representation.
433
434	Examples:
435
436	>>> string_from_bytes(1)
437	u'1 byte(s)'
438
439	>>> string_from_bytes(1025)
440	u'1 KB'
441
442	>>> string_from_bytes(1.5 * 1024*1024)
443	u'1.50 MB'
444
445	>>> string_from_bytes(673.286 * 1024**3)
446	u'673.29 GB'
447
448	"""
449	if number < 1024:
450	return u'%s byte(s)' % (str(number),)
451	elif number < 1024**2:
452	return u'%s KB' % (number / 1024,)
453	elif number < 1024**3:
454	return u'%.2f MB' % (number / 1024**2,)
455	return u'%.2f GB' % (number / 1024**3,)
456
457	def file_size(file_like_obj):
458	"""Determine file size in most effective manner.
459
460	Returns the number of bytes in a file. This function works for
461	both, real files as well as file-like objects like cStringIO based
462	'files'.
463
464	Example:
465
466	>>> from cStringIO import StringIO
467	>>> file_size(StringIO('my file content'))
468	15
469
470	Please note that this function expects the file-like object passed
471	in to be at first reading position (it does no seek(0)) and that
472	when finished the file pointer might be at end of file.
473	"""
474	if hasattr(file_like_obj, 'fileno'):
475	return os.fstat(file_like_obj.fileno())[6]
476	file_like_obj.seek(0, 2) # seek to last position in file
477	return file_like_obj.tell()
478
479	def get_user_account(request):
480	"""Return local user account.
481	"""
482	principal_id = request.principal.id
483	authenticator = getUtility(IAuthenticatorPlugin, name='users')
484	account = authenticator.getAccount(principal_id)
485	return account
486
487	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
488	"""Get all attribute names of an interface.
489
490	Searches also base interfaces.
491
492	Names of fields that are pure attributes
493	(i.e. zope.interface.Attribute) or methods are excluded by
494	default.
495
496	Names of typical fields derived from zope.schema are included.
497
498	The `omit` paramter can give a list of names to exclude.
499
500	Returns an unsorted list of strings.
501	"""
502	ifaces = set((iface,))
503	# Collect all interfaces (also bases) recursively
504	while True:
505	ext_ifaces = set(ifaces)
506	for iface in ext_ifaces:
507	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
508	if ext_ifaces == ifaces:
509	# No new interfaces found, list complete
510	break
511	ifaces = ext_ifaces
512	# Collect (filtered) names of collected interfaces
513	result = []
514	for iface in ifaces:
515	for name, descr in iface.namesAndDescriptions():
516	if name in omit:
517	continue
518	if exclude_attribs and descr.__class__ is Attribute:
519	continue
520	if exclude_methods and isinstance(descr, Method):
521	continue
522	if name in result:
523	continue
524	result.append(name)
525	return result
526
527	def get_sorted_preferred(tuples_iterable, preferred_list):
528	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
529	`preferred_list` put in front.
530
531	The rest of the tuples iterable is returned in orginal order. This
532	is useful for putting default entries on top of (already sorted)
533	lists of choice values, for instance when sorting countries and
534	their code.
535
536	Sample:
537
538	We have a list of tuples with uppercase 'titles' and lowercase
539	'tokens'. This list is already sorted but we want certain values
540	of this list to show up before other values. For instance we want
541	to see the 'C' entry to come first.
542
543	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
544	... ['c'])
545	(('C', 'c'), ('A', 'a'), ('B', 'b'))
546
547	i.e. the entry with 'c' as second value moved to head of result.
548
549	We can also require multiple entries at head of list:
550
551	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
552	... ['b', 'c'])
553	(('B', 'b'), ('C', 'c'), ('A', 'a'))
554
555	We required the 'b' entry to come before the 'c' entry and then
556	the rest of the input list. That's what we got.
557
558	The result is returned as a tuple of tuples to keep order of values.
559	"""
560	result = [None for x in preferred_list]
561	for title, code in tuples_iterable:
562	if code in preferred_list:
563	index = preferred_list.index(code)
564	result[index] = (title, code)
565	else:
566	result.append((title, code))
567	return tuple(result)
568
569	def now(tz=None):
570	"""Get current datetime in timezone of `tz`.
571
572	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
573
574	`tz` should be a timezone as defined in pytz.
575	"""
576	return to_timezone(datetime.datetime.utcnow(), tz=tz)
577
578	def to_timezone(dt, tz=None):
579	"""Shift datetime into timezone `tz`.
580
581	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
582	assumed to be UTC.
583
584	If no `tz` is given, shift to UTC is performed.
585
586	If `dt` is not a datetime.datetime, the input value is returned
587	unchanged.
588	"""
589	if not isinstance(dt, datetime.datetime):
590	return dt
591	if tz is None:
592	tz = pytz.utc
593	if dt.tzinfo is None:
594	dt = pytz.utc.localize(dt)
595	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
596
597	def get_fileformat(path, bytestream=None):
598	"""Try to determine the file format of a given media file.
599
600	Although checks done here are not done very thoroughly, they make
601	no assumptions about the filetype by looking at its filename
602	extension or similar. Instead they check header data to comply
603	with common known rules (Magic Words).
604
605	If bytestream is not `None` the `path` is ignored.
606
607	Returns filetype as string (something like ``'jpg'``) if
608	file-format can be recognized, ``None`` else.
609
610	Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
611
612	More filetypes (though untested in waeup.kofa) are automatically
613	recognized because we deploy the stdlib `imghdr` library. See this
614	module's docs for a complete list of filetypes recognized.
615	"""
616	if path is None and bytestream is None:
617	return None
618
619	img_type = None
620	if bytestream is not None:
621	img_type = imghdr.what(path, bytestream)
622	else:
623	img_type = imghdr.what(path)
624	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
625	if img_type == name:
626	img_type = replacement
627	return img_type
628
629	def check_pdf(bytestream, file):
630	"""Tell whether a file or bytestream is a PDF file.
631
632	Works as a test/plugin for the stdlib `imghdr` library.
633	"""
634	if file is not None:
635	file.seek(0)
636	bytestream = file.read(4)
637	file.seek(0)
638
639	if bytestream.startswith('%PDF'):
640	return 'pdf'
641	return None
642
643	# register check_pdf as header check function with `imghdr`
644	if check_pdf not in imghdr.tests:
645	imghdr.tests.append(check_pdf)
646
647	def merge_csv_files(path1, path2):
648	"""Merge two CSV files into one (appending).
649
650	CSV data from `path2` will be merged into `path1` csv file. This
651	is a bit like 'appending' data from path2 to data from path1.
652
653	The path of the resulting temporary file will be returned.
654
655	In the result file data from `path2` will always come _after_ data
656	from `path1`.
657
658	Caution: It is the _callers_ responsibility to remove the
659	result file (which is created by tempfile.mkstemp) after usage.
660
661	This CSV file merging copes with different column orders in both
662	CSV files and even with different column sets in both files.
663
664	Also broken/empty CSV files can be handled.
665	"""
666	# sniff the col names
667	try:
668	row10 = csv.DictReader(open(path1, 'rb')).next()
669	except StopIteration:
670	row10 = dict()
671	try:
672	row20 = csv.DictReader(open(path2, 'rb')).next()
673	except StopIteration:
674	row20 = dict()
675	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
676	# now read/write the real data
677	reader1 = csv.DictReader(open(path1, 'rb'))
678	reader2 = csv.DictReader(open(path2, 'rb'))
679	wp, tmp_path = tempfile.mkstemp()
680	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
681	writer.writerow(dict((x,x) for x in fieldnames)) # header
682	for row in reader1:
683	writer.writerow(row)
684	for row in reader2:
685	writer.writerow(row)
686	return tmp_path

Note: See TracBrowser for help on using the repository browser.

Download in other formats: