Context navigation

helpers.py @ 17497

Last change on this file since 17497 was 15258, checked in by Henrik Bettermann, 6 years ago
Do not allow uploading data with trailing whitespaces.
Property svn:keywords set to `Id`
File size: 26.7 KB

Rev	Line
[7196]	1	## $Id: helpers.py 15258 2018-11-30 07:34:44Z henrik $
	2	##
	3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
[11949]	18	"""General helper functions for Ikoba.
[4188]	19	"""
[10677]	20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
[8185]	21	import datetime
[8466]	22	import imghdr
[9593]	23	import logging
[4188]	24	import os
[8185]	25	import pytz
[4375]	26	import re
[4188]	27	import shutil
[8631]	28	import tempfile
[5731]	29	import grok
[5848]	30	from cStringIO import StringIO
	31	from docutils.core import publish_string
[12634]	32	from zope.i18n import translate
[7943]	33	from zope.component import getUtility
[5731]	34	from zope.component.interfaces import IFactory
[5734]	35	from zope.interface import implementedBy
[7941]	36	from zope.interface.interface import Method, Attribute
[6071]	37	from zope.schema import getFieldNames
	38	from zope.schema.fieldproperty import FieldProperty
[6372]	39	from zope.security.interfaces import NoInteraction
	40	from zope.security.management import getInteraction
[7175]	41	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
[12229]	42	from zope.formlib.widget import renderElement
[12634]	43	from waeup.ikoba.interfaces import MessageFactory as _
[4188]	44
[6503]	45	BUFSIZE = 8 * 1024
[6372]	46
[10677]	47
[7186]	48	def remove_file_or_directory(filepath):
[4188]	49	"""Remove a file or directory.
[5738]	50
	51	Different to :func:`shutil.rmtree` we also accept not existing
	52	paths (returning silently) and if a dir turns out to be a regular
	53	file, we remove that.
[4188]	54	"""
	55	filepath = os.path.abspath(filepath)
	56	if not os.path.exists(filepath):
	57	return
	58	if os.path.isdir(filepath):
	59	shutil.rmtree(filepath)
	60	else:
	61	os.unlink(filepath)
	62	return
	63
[10677]	64
[7186]	65	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
[4188]	66	"""Copy contents of directory src to directory dst.
	67
	68	Both directories must exists.
	69
	70	If `overwrite` is true, any same named objects will be
	71	overwritten. Otherwise these files will not be touched.
	72
	73	If `del_old` is true, copied files and directories will be removed
	74	from the src directory.
	75
	76	This functions returns a list of non-copied files.
	77
	78	Unix hidden files and directories (starting with '.') are not
	79	processed by this function.
	80	"""
	81	if not os.path.exists(src):
	82	raise ValueError('source path does not exist: %s' % src)
	83	if not os.path.exists(dst):
	84	raise ValueError('destination path does not exist: %s' % dst)
	85	if not os.path.isdir(src):
	86	raise ValueError('source path is not a directory: %s' % src)
	87	if not os.path.isdir(dst):
	88	raise ValueError('destination path is not a directory: %s' % dst)
	89	not_copied = []
	90	for item in os.listdir(src):
	91	if item.startswith('.'):
[10677]	92	continue # We do not copy hidden stuff...
[4188]	93	itemsrc = os.path.join(src, item)
	94	itemdst = os.path.join(dst, item)
	95
	96	if os.path.exists(itemdst):
	97	if overwrite is True:
[7186]	98	remove_file_or_directory(itemdst)
[4188]	99	else:
	100	not_copied.append(item)
	101	continue
[6113]	102
[4188]	103	if os.path.isdir(itemsrc):
	104	shutil.copytree(itemsrc, itemdst)
	105	else:
	106	shutil.copy2(itemsrc, itemdst)
	107	if del_old:
[7186]	108	remove_file_or_directory(itemsrc)
[4188]	109	return not_copied
[4375]	110
	111
[7186]	112	def get_inner_HTML_part(html_code):
[4375]	113	"""Return the 'inner' part of a complete HTML snippet.
	114
	115	If there is a form part, get this.
	116
	117	If there is no form part, try to return the body part contents.
	118
	119	If there is no body, return as-is.
[5738]	120
	121	Let's see how that works. If we deliver some doc with form, we
	122	will get that form only:
	123
	124	>>> doc = '<html><form>My Form</form>Outside the form</html>'
[7186]	125	>>> get_inner_HTML_part(doc)
[5738]	126	'<form>My Form</form>'
	127
	128	No form? Then seek for a body part and get the contents:
	129
	130	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
[7186]	131	>>> get_inner_HTML_part(doc)
[5738]	132	'My Body'
	133
	134	If none of these is included, return what we got:
	135
	136	>>> doc = '<html>without body nor form</html>'
[7186]	137	>>> get_inner_HTML_part(doc)
[5738]	138	'<html>without body nor form</html>'
	139
[4375]	140	"""
	141
	142	try:
[5738]	143	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
[4375]	144	re.DOTALL).groups()[0]
	145	return result
	146	except AttributeError:
	147	# No <form> part included
	148	try:
	149	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
	150	re.DOTALL).groups()[0]
	151	return result
	152	except AttributeError:
	153	# No <form> and no <body> tag...
	154	pass
	155	return html_code
	156
[10677]	157
[5731]	158	class FactoryBase(grok.GlobalUtility):
	159	"""A factory for things.
	160
	161	This is a baseclass for easier creation of factories. Factories
	162	are utilities that are registered under a certain name and return
	163	instances of certain classes when called.
	164
[11949]	165	In :mod:`waeup.ikoba` we use factories extensively for
[7933]	166	batching. While processing a batch some processors looks up a
[5731]	167	factory to create real-world instances that then get filled with
	168	data from imported CSV files.
	169
	170	To get rid of reimplementing the same stuff over and over again,
	171	most notably the methods defined here, we offer this base class
	172	(which will not be registered as a factory itself).
	173
	174	Real factories can then be created like this:
	175
	176	>>> import grok
[11949]	177	>>> from waeup.ikoba.utils.helpers import FactoryBase
[5731]	178	>>> class MyObject(object):
	179	... # Some class we want to get instances of.
	180	... pass
	181	>>> class MyObjectFactory(FactoryBase):
	182	... # This is the factory for MyObject instances
[11949]	183	... grok.name(u'waeup.ikoba.factory.MyObject')
[5731]	184	... factory = MyObject
	185
	186	That's it. It is essential to set the ``factory`` attribute, which
	187	will determine the class of which instances should be created when
	188	called. The given name must even be unique amongst all utilities
	189	registered during runtime. While you can pick any name you like
[11949]	190	you might want to prepend ``waeup.ikoba.factory.`` to the name
[5731]	191	string to make sure it does not clash with names of other
	192	utilities one day.
	193
	194	Before all this works we have to grok the baseclass once and our
	195	freshly defined factory. This executes all the component
	196	registration stuff we don't want to do ourselves. In daily use
[11949]	197	this is done automatically on startup of a :mod:`waeup.ikoba`
[5731]	198	system.
[6113]	199
[11949]	200	>>> grok.testing.grok('waeup.ikoba.utils.helpers')
[5731]	201	>>> grok.testing.grok_component(
	202	... 'MyObjectFactory', MyObjectFactory
	203	... )
	204	True
	205
[7933]	206	After grokking we (and processors) can create objects without
[5731]	207	knowing about the location of the real class definition, just by
	208	the factory name:
	209
	210	>>> from zope.component import createObject
[11949]	211	>>> obj = createObject('waeup.ikoba.factory.MyObject')
[5731]	212	>>> isinstance(obj, MyObject)
	213	True
	214
	215	We can also use the regular utility lookups to find our new
	216	factory:
	217
	218	>>> from zope.component import getUtility
	219	>>> from zope.component.interfaces import IFactory
	220	>>> factory = getUtility(
[11949]	221	... IFactory, name='waeup.ikoba.factory.MyObject'
[5731]	222	... )
	223	>>> isinstance(factory, MyObjectFactory)
	224	True
	225
	226	And this factory generates `MyObject` instances:
	227
	228	>>> obj = factory()
	229	>>> isinstance(obj, MyObject)
	230	True
	231
	232	"""
[10677]	233	grok.baseclass() # Do not grok this class, do not register us.
[5731]	234	grok.implements(IFactory)
	235	# You can override any of the following attributes in derived
	236	# classes. The `grok.name` setting must even be set to some
	237	# unique value.
	238	grok.name(u'waeup.Factory')
	239	title = u"Create instances of ``factory``.",
[11947]	240	description = u"This factory instantiates new e.g. applicant instances."
[5731]	241	factory = None
	242
	243	def __call__(self, args, *kw):
	244	"""The main factory function.
	245
	246	Returns an instance of the requested object.
	247	"""
	248	return self.factory()
	249
	250	def getInterfaces(self):
	251	# Required by IFactory
	252	return implementedBy(self.factory)
[5848]	253
[10677]	254
[5848]	255	def ReST2HTML_w_warnings(source_string):
	256	"""Convert a reStructuredText string to HTML preserving warnings.
	257
	258	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
	259	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
[5876]	260	source string (in unicode), ``<WARNINGS>`` is a string containing
	261	any warning messages or ``None``.
[6113]	262
[5848]	263	Regular multi-line ReStructuredText strings will be returned as
	264	HTML code:
	265
[11949]	266	>>> from waeup.ikoba.utils.helpers import ReST2HTML
[5848]	267	>>> source = '''
	268	... Headline
	269	... ========
	270	...
	271	... - A list item
	272	... - Another item
	273	...
	274	... Thanks for watching!
	275	... '''
	276	>>> html, warnings = ReST2HTML_w_warnings(source)
	277	>>> print html
	278	<div class="document" id="headline">
	279	<h1 class="title">Headline</h1>
	280	<BLANKLINE>
	281	<ul class="simple">
	282	<li>A list item</li>
	283	<li>Another item</li>
	284	</ul>
	285	<p>Thanks for watching!</p>
	286	</div>
	287
	288	Here no warnings happened, so the `warnings` are ``None``:
	289
	290	>>> warnings is None
	291	True
[6113]	292
[5848]	293	If warnings happen then they can be retrieved in the returned
	294	``warnings``. We try to render an erraneous document:
	295
	296	>>> source = '''
	297	... Headline
	298	... ======
	299	...
	300	... Thanks for watching!
	301	... '''
	302	>>> html, warnings = ReST2HTML_w_warnings(source)
	303	>>> print html
	304	<div class="document" id="headline">
	305	<h1 class="title">Headline</h1>
	306	<BLANKLINE>
	307	<p>Thanks for watching!</p>
	308	</div>
	309
	310	>>> print warnings
	311	<string>:3: (WARNING/2) Title underline too short.
	312	<BLANKLINE>
	313	Headline
	314	======
	315	<BLANKLINE>
	316
	317	As you can see, the warnings are not displayed inline the document
	318	but can be retrieved from the returned warnings, which is a string
	319	or ``None``.
	320	"""
	321	warnings = StringIO()
	322	fulldoc = publish_string(
	323	source_string, writer_name='html4css1',
	324	settings_overrides={
	325	'report_level': 0,
	326	'warning_stream': warnings,
	327	})
	328	warnings.seek(0)
	329	warning_msgs = warnings.read()
	330	if warning_msgs:
	331	# Render again, this time with no warnings inline...
[10677]	332	fulldoc = publish_string(
[5848]	333	source_string, writer_name='html4css1',
	334	settings_overrides={
	335	'report_level': 10000,
	336	'halt_level': 10000,
	337	'warning_stream': warnings,
	338	})
	339	if warning_msgs == '':
	340	warning_msgs = None
[7186]	341	result = get_inner_HTML_part(fulldoc).strip()
[5876]	342	if not isinstance(result, unicode):
	343	result = result.decode('utf-8')
	344	return result, warning_msgs
[5848]	345
[10677]	346
[5848]	347	def ReST2HTML(source_string):
	348	"""Render a string containing ReStructuredText to HTML.
	349
	350	Any warnings about too short headings, etc. are silently
	351	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
	352	warnings.
	353
[5876]	354	The returned string will be unicode.
[6113]	355
[5848]	356	A regular document will be rendered like this:
	357
	358	>>> source = '''
	359	... Headline
	360	... ========
	361	...
	362	... Thanks for watching!
	363	... '''
	364	>>> html = ReST2HTML(source)
	365	>>> print html
	366	<div class="document" id="headline">
	367	<h1 class="title">Headline</h1>
	368	<BLANKLINE>
	369	<p>Thanks for watching!</p>
	370	</div>
	371
	372	A document with markup problems (here: the underline is too short)
	373	will look similar:
	374
	375	>>> source = '''
	376	... Headline
	377	... ======
	378	...
	379	... Thanks for watching!
	380	... '''
	381	>>> html = ReST2HTML(source)
	382	>>> print html
	383	<div class="document" id="headline">
	384	<h1 class="title">Headline</h1>
	385	<BLANKLINE>
	386	<p>Thanks for watching!</p>
	387	</div>
[6113]	388
[5848]	389	"""
	390	html, warnings = ReST2HTML_w_warnings(source_string)
	391	return html
[6071]	392
[10677]	393
[9689]	394	def attrs_to_fields(cls, omit=[]):
[6071]	395	"""Turn the attributes of a class into FieldProperty instances.
[6113]	396
	397	With Python >= 2.6 we can even use this function as a class decorator.
[9689]	398
	399	`omit` is a list of field names that should _not_ be turned into
	400	field properties. This is useful for properties and the like.
[6071]	401	"""
	402	iface = list(implementedBy(cls))[0]
	403	for field_name in getFieldNames(iface):
[9689]	404	if field_name in omit:
	405	continue
[11476]	406	field_property = FieldProperty(iface[field_name])
	407	# Set proper docstring for the API docs.
	408	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
	409	setattr(cls, field_name, field_property)
[6071]	410	return cls
[6372]	411
[10677]	412
[6372]	413	def get_current_principal():
	414	"""Get the 'current' principal.
	415
	416	This method works without a request. Examining a request is the
	417	regular (and recommended) way to get a principal involved
	418	'currently'.
	419
	420	Use this method only if you really have no access to the current
	421	request.
	422
	423	Returns ``None`` when no principal is involved (for instance
	424	during tests).
	425	"""
	426	try:
	427	principal = getInteraction().participations[0].principal
	428	except NoInteraction:
	429	return None
[10677]	430	except IndexError: # No participations present
[6372]	431	return None
	432	return principal
[6503]	433
[10677]	434
[6503]	435	def cmp_files(file_descr1, file_descr2):
	436	"""Compare two files by their file descriptors.
	437
	438	Returns ``True`` if both are equal, ``False`` otherwise.
	439	"""
[6531]	440	file_descr1.seek(0)
	441	file_descr2.seek(0)
[6503]	442	while True:
	443	b1 = file_descr1.read(BUFSIZE)
	444	b2 = file_descr2.read(BUFSIZE)
	445	if b1 != b2:
	446	return False
	447	if not b1:
	448	return True
[7078]	449
[10677]	450
[7078]	451	def string_from_bytes(number):
	452	"""Turn a number into some textual representation.
	453
	454	Examples:
	455
	456	>>> string_from_bytes(1)
	457	u'1 byte(s)'
	458
	459	>>> string_from_bytes(1025)
	460	u'1 KB'
	461
	462	>>> string_from_bytes(1.5 * 1024*1024)
	463	u'1.50 MB'
	464
	465	>>> string_from_bytes(673.286 * 1024**3)
	466	u'673.29 GB'
	467
	468	"""
	469	if number < 1024:
	470	return u'%s byte(s)' % (str(number),)
[10677]	471	elif number < 1024 ** 2:
[7078]	472	return u'%s KB' % (number / 1024,)
[10677]	473	elif number < 1024 ** 3:
	474	return u'%.2f MB' % (number / 1024 ** 2,)
	475	return u'%.2f GB' % (number / 1024 ** 3,)
[7079]	476
[10677]	477
[7079]	478	def file_size(file_like_obj):
	479	"""Determine file size in most effective manner.
	480
	481	Returns the number of bytes in a file. This function works for
	482	both, real files as well as file-like objects like cStringIO based
	483	'files'.
	484
	485	Example:
	486
	487	>>> from cStringIO import StringIO
	488	>>> file_size(StringIO('my file content'))
	489	15
	490
	491	Please note that this function expects the file-like object passed
	492	in to be at first reading position (it does no seek(0)) and that
	493	when finished the file pointer might be at end of file.
	494	"""
	495	if hasattr(file_like_obj, 'fileno'):
	496	return os.fstat(file_like_obj.fileno())[6]
[10677]	497	file_like_obj.seek(0, 2) # seek to last position in file
[7079]	498	return file_like_obj.tell()
[7175]	499
[10677]	500
[7175]	501	def get_user_account(request):
	502	"""Return local user account.
	503	"""
	504	principal_id = request.principal.id
[7234]	505	authenticator = getUtility(IAuthenticatorPlugin, name='users')
	506	account = authenticator.getAccount(principal_id)
[7175]	507	return account
[7941]	508
[10677]	509
[7941]	510	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
	511	"""Get all attribute names of an interface.
	512
	513	Searches also base interfaces.
	514
	515	Names of fields that are pure attributes
	516	(i.e. zope.interface.Attribute) or methods are excluded by
	517	default.
	518
	519	Names of typical fields derived from zope.schema are included.
	520
	521	The `omit` paramter can give a list of names to exclude.
	522
	523	Returns an unsorted list of strings.
	524	"""
[8370]	525	ifaces = set((iface,))
	526	# Collect all interfaces (also bases) recursively
	527	while True:
	528	ext_ifaces = set(ifaces)
	529	for iface in ext_ifaces:
	530	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
	531	if ext_ifaces == ifaces:
	532	# No new interfaces found, list complete
	533	break
	534	ifaces = ext_ifaces
	535	# Collect (filtered) names of collected interfaces
[7941]	536	result = []
[8370]	537	for iface in ifaces:
	538	for name, descr in iface.namesAndDescriptions():
	539	if name in omit:
	540	continue
	541	if exclude_attribs and descr.__class__ is Attribute:
	542	continue
	543	if exclude_methods and isinstance(descr, Method):
	544	continue
[9043]	545	if name in result:
	546	continue
[8370]	547	result.append(name)
[7941]	548	return result
[7968]	549
[10677]	550
[7968]	551	def get_sorted_preferred(tuples_iterable, preferred_list):
	552	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
	553	`preferred_list` put in front.
	554
	555	The rest of the tuples iterable is returned in orginal order. This
	556	is useful for putting default entries on top of (already sorted)
	557	lists of choice values, for instance when sorting countries and
	558	their code.
	559
	560	Sample:
	561
	562	We have a list of tuples with uppercase 'titles' and lowercase
	563	'tokens'. This list is already sorted but we want certain values
	564	of this list to show up before other values. For instance we want
	565	to see the 'C' entry to come first.
	566
	567	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	568	... ['c'])
	569	(('C', 'c'), ('A', 'a'), ('B', 'b'))
	570
	571	i.e. the entry with 'c' as second value moved to head of result.
	572
	573	We can also require multiple entries at head of list:
	574
	575	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	576	... ['b', 'c'])
	577	(('B', 'b'), ('C', 'c'), ('A', 'a'))
	578
	579	We required the 'b' entry to come before the 'c' entry and then
	580	the rest of the input list. That's what we got.
	581
	582	The result is returned as a tuple of tuples to keep order of values.
	583	"""
	584	result = [None for x in preferred_list]
	585	for title, code in tuples_iterable:
	586	if code in preferred_list:
	587	index = preferred_list.index(code)
	588	result[index] = (title, code)
	589	else:
	590	result.append((title, code))
	591	return tuple(result)
[8185]	592
[10677]	593
[8185]	594	def now(tz=None):
	595	"""Get current datetime in timezone of `tz`.
	596
	597	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
	598
	599	`tz` should be a timezone as defined in pytz.
	600	"""
	601	return to_timezone(datetime.datetime.utcnow(), tz=tz)
	602
[10677]	603
[8185]	604	def to_timezone(dt, tz=None):
	605	"""Shift datetime into timezone `tz`.
	606
	607	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
	608	assumed to be UTC.
	609
	610	If no `tz` is given, shift to UTC is performed.
[8192]	611
	612	If `dt` is not a datetime.datetime, the input value is returned
	613	unchanged.
[8185]	614	"""
[8192]	615	if not isinstance(dt, datetime.datetime):
	616	return dt
[8185]	617	if tz is None:
	618	tz = pytz.utc
	619	if dt.tzinfo is None:
	620	dt = pytz.utc.localize(dt)
	621	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
[8466]	622
[10677]	623
[11660]	624	def imghdr_test_fpm(h, f):
	625	"""FPM fileformat test.
	626
	627	The `fpm` fileformat is the binary fingerprint data as created by
	628	`libfprint`.
	629	"""
	630	if len(h) >= 3 and h[:3] == 'FP1':
	631	return 'fpm'
	632
	633
	634	#: Add test function in stdlib's imghdr tests.
	635	imghdr.tests.append(imghdr_test_fpm)
	636
	637
[8466]	638	def get_fileformat(path, bytestream=None):
	639	"""Try to determine the file format of a given media file.
	640
	641	Although checks done here are not done very thoroughly, they make
	642	no assumptions about the filetype by looking at its filename
	643	extension or similar. Instead they check header data to comply
	644	with common known rules (Magic Words).
	645
	646	If bytestream is not `None` the `path` is ignored.
	647
	648	Returns filetype as string (something like ``'jpg'``) if
	649	file-format can be recognized, ``None`` else.
	650
[11660]	651	Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
	652	`pdf`.
[8466]	653
[11949]	654	More filetypes (though untested in waeup.ikoba) are automatically
[8466]	655	recognized because we deploy the stdlib `imghdr` library. See this
	656	module's docs for a complete list of filetypes recognized.
	657	"""
	658	if path is None and bytestream is None:
	659	return None
	660
	661	img_type = None
	662	if bytestream is not None:
	663	img_type = imghdr.what(path, bytestream)
	664	else:
	665	img_type = imghdr.what(path)
	666	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
	667	if img_type == name:
	668	img_type = replacement
	669	return img_type
	670
[10677]	671
[8466]	672	def check_pdf(bytestream, file):
	673	"""Tell whether a file or bytestream is a PDF file.
	674
	675	Works as a test/plugin for the stdlib `imghdr` library.
	676	"""
	677	if file is not None:
	678	file.seek(0)
	679	bytestream = file.read(4)
	680	file.seek(0)
	681
	682	if bytestream.startswith('%PDF'):
	683	return 'pdf'
	684	return None
	685
	686	# register check_pdf as header check function with `imghdr`
	687	if check_pdf not in imghdr.tests:
	688	imghdr.tests.append(check_pdf)
[8631]	689
[10677]	690
[8631]	691	def merge_csv_files(path1, path2):
	692	"""Merge two CSV files into one (appending).
	693
	694	CSV data from `path2` will be merged into `path1` csv file. This
	695	is a bit like 'appending' data from path2 to data from path1.
	696
	697	The path of the resulting temporary file will be returned.
	698
	699	In the result file data from `path2` will always come _after_ data
	700	from `path1`.
	701
	702	Caution: It is the _callers_ responsibility to remove the
	703	result file (which is created by tempfile.mkstemp) after usage.
	704
	705	This CSV file merging copes with different column orders in both
	706	CSV files and even with different column sets in both files.
[8633]	707
	708	Also broken/empty CSV files can be handled.
[8631]	709	"""
	710	# sniff the col names
[8633]	711	try:
	712	row10 = csv.DictReader(open(path1, 'rb')).next()
	713	except StopIteration:
	714	row10 = dict()
	715	try:
	716	row20 = csv.DictReader(open(path2, 'rb')).next()
	717	except StopIteration:
	718	row20 = dict()
[8631]	719	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
	720	# now read/write the real data
	721	reader1 = csv.DictReader(open(path1, 'rb'))
	722	reader2 = csv.DictReader(open(path2, 'rb'))
	723	wp, tmp_path = tempfile.mkstemp()
	724	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
[10677]	725	writer.writerow(dict((x, x) for x in fieldnames)) # header
[8631]	726	for row in reader1:
	727	writer.writerow(row)
	728	for row in reader2:
	729	writer.writerow(row)
	730	return tmp_path
[9372]	731
[10677]	732
[9372]	733	def product(sequence, start=1):
	734	"""Returns the product of a sequence of numbers (_not_ strings)
	735	multiplied by the parameter `start` (defaults to 1). If the
	736	sequence is empty, returns 0.
	737	"""
	738	if not len(sequence):
	739	return 0
	740	result = start
	741	for item in sequence:
	742	result *= item
	743	return result
[9593]	744
[10677]	745
[9593]	746	class NullHandler(logging.Handler):
	747	"""A logging NullHandler.
	748
	749	Does not log anything. Useful if you want to shut up a log.
	750
	751	Defined here for backwards compatibility with Python < 2.7.
	752	"""
	753	def emit(self, record):
	754	pass
[10676]	755
	756
	757	def check_csv_charset(iterable):
[15258]	758	"""Check contents of `iterable` regarding valid CSV encoding and
	759	trailing whitespaces in data.
[10676]	760
	761	`iterable` is expected to be an iterable on _rows_ (not
	762	chars). This is true for instance for
	763	filehandlers. `zope.publisher.browser.FileUpload` instances are
	764	_not_ iterable, unfortunately.
	765
	766	Returns line num of first illegal char or ``None``. Line nums
[15258]	767	start counting with 1 (not zero). Returns -1 if data contain
	768	trailing whitespaces.
[10676]	769	"""
	770	linenum = 1
	771	try:
[15258]	772	reader = csv.DictReader(iterable)
[10676]	773	for row in reader:
	774	linenum += 1
[15258]	775	for value in row.values():
	776	if value.endswith(' '):
	777	return -1
[10676]	778	except UnicodeDecodeError:
	779	return linenum
	780	except:
	781	return linenum + 1
	782	return None
[11824]	783
	784
	785	class MemInfo(dict):
	786	"""A dict with access to its items like if they are attributes.
	787	"""
	788	__getattr__ = dict.__getitem__
	789	__setattr__ = dict.__setitem__
	790	__delattr__ = dict.__delitem__
	791
	792
	793	def get_meminfo(src="/proc/meminfo"):
	794	"""Get local memory info as provided in /proc/meminfo.
	795
	796	Entries in /proc/meminfo are available as MemInfo attributes.
	797
	798	By default we lookup a file /proc/meminfo. Another path can be
	799	lines = open(src, 'r').read()passed in as `src` parameter. In this
	800	case `src` must be a regular file and contain meminfo-style data.
	801
	802	If the given `src` (or `/proc/meminfo`) are not available, `None`
	803	lines = open(src, 'r').read()is returned.
	804	"""
	805	if not os.path.isfile(src):
	806	return None
	807	lines = open(src, 'r').read().splitlines()
	808	result = MemInfo()
	809	for line in lines:
	810	key, value = line.split(':', 1)
	811	value = int(value.split(' kB', 1)[0])
	812	result[key] = value
	813	return result
[12229]	814
	815	def html2dict(value=None,portal_language='en'):
	816	"""Transforms a localized HTML text string into a dictionary.
	817
[13135]	818	Different languages must be separated by ``>>xy<<`` whereas
[12229]	819	xy is the language code. Text parts without correct leading
	820	language separator - usually the first part has no language
	821	descriptor - are interpreted as texts in the portal's language.
	822	"""
	823	try:
	824	parts = value.split('>>')
	825	except:
	826	return {}
	827	elements = {}
	828	lang = portal_language
	829	for part in parts:
	830	if part[2:4] == u'<<':
[12361]	831	lang = str(part[0:2].lower())
[12229]	832	text = part[4:]
	833	elements[lang] = renderElement(u'div id="html"',
	834	contents=text)
	835	else:
	836	text = part
	837	elements[lang] = renderElement(u'div id="html"',
	838	contents=text)
	839	return elements
[12408]	840
	841	def rest2dict(value=None,portal_language='en'):
	842	"""Transforms a localized REST text string into a dictionary.
	843
[13135]	844	Different languages must be separated by ``>>xy<<`` whereas
[12408]	845	xy is the language code. Text parts without correct leading
	846	language separator - usually the first part has no language
	847	descriptor - are interpreted as texts in the portal's language.
	848	"""
	849	try:
	850	parts = value.split('>>')
	851	except:
	852	return {}
	853	elements = {}
	854	lang = portal_language
	855	for part in parts:
	856	if part[2:4] == u'<<':
	857	lang = str(part[0:2].lower())
	858	text = part[4:]
	859	elements[lang] = renderElement(u'div id="rest"',
	860	contents=ReST2HTML(text))
	861	else:
	862	text = part
	863	elements[lang] = renderElement(u'div id="rest"',
	864	contents=ReST2HTML(text))
	865	return elements
[12634]	866
	867	def format_date(dateobj, request):
	868	if isinstance(dateobj, datetime.date):
	869	return dateobj.strftime("%d/%m/%Y")
	870	else:
	871	return translate(_('indefinite'), context=request)

Note: See TracBrowser for help on using the repository browser.

Context navigation

source: main/waeup.ikoba/trunk/src/waeup/ikoba/utils/helpers.py @ 17497

Download in other formats: