Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 15363

Last change on this file since 15363 was 14939, checked in by Henrik Bettermann, 7 years ago
Do not allow uploading data with trailing whitespaces.
Property svn:keywords set to `Id`
File size: 26.5 KB

Rev	Line
[7196]	1	## $Id: helpers.py 14939 2018-01-18 07:34:42Z henrik $
	2	##
	3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
[7819]	18	"""General helper functions for Kofa.
[4188]	19	"""
[10677]	20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
[8185]	21	import datetime
[8466]	22	import imghdr
[9593]	23	import logging
[4188]	24	import os
[8185]	25	import pytz
[4375]	26	import re
[4188]	27	import shutil
[8631]	28	import tempfile
[5731]	29	import grok
[5848]	30	from cStringIO import StringIO
	31	from docutils.core import publish_string
[7943]	32	from zope.component import getUtility
[5731]	33	from zope.component.interfaces import IFactory
[5734]	34	from zope.interface import implementedBy
[7941]	35	from zope.interface.interface import Method, Attribute
[6071]	36	from zope.schema import getFieldNames
	37	from zope.schema.fieldproperty import FieldProperty
[6372]	38	from zope.security.interfaces import NoInteraction
	39	from zope.security.management import getInteraction
[7175]	40	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
[12231]	41	from zope.formlib.widget import renderElement
[4188]	42
[6503]	43	BUFSIZE = 8 * 1024
[6372]	44
[10677]	45
[7186]	46	def remove_file_or_directory(filepath):
[4188]	47	"""Remove a file or directory.
[5738]	48
	49	Different to :func:`shutil.rmtree` we also accept not existing
	50	paths (returning silently) and if a dir turns out to be a regular
	51	file, we remove that.
[4188]	52	"""
	53	filepath = os.path.abspath(filepath)
	54	if not os.path.exists(filepath):
	55	return
	56	if os.path.isdir(filepath):
	57	shutil.rmtree(filepath)
	58	else:
	59	os.unlink(filepath)
	60	return
	61
[10677]	62
[7186]	63	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
[4188]	64	"""Copy contents of directory src to directory dst.
	65
	66	Both directories must exists.
	67
	68	If `overwrite` is true, any same named objects will be
	69	overwritten. Otherwise these files will not be touched.
	70
	71	If `del_old` is true, copied files and directories will be removed
	72	from the src directory.
	73
	74	This functions returns a list of non-copied files.
	75
	76	Unix hidden files and directories (starting with '.') are not
	77	processed by this function.
	78	"""
	79	if not os.path.exists(src):
	80	raise ValueError('source path does not exist: %s' % src)
	81	if not os.path.exists(dst):
	82	raise ValueError('destination path does not exist: %s' % dst)
	83	if not os.path.isdir(src):
	84	raise ValueError('source path is not a directory: %s' % src)
	85	if not os.path.isdir(dst):
	86	raise ValueError('destination path is not a directory: %s' % dst)
	87	not_copied = []
	88	for item in os.listdir(src):
	89	if item.startswith('.'):
[10677]	90	continue # We do not copy hidden stuff...
[4188]	91	itemsrc = os.path.join(src, item)
	92	itemdst = os.path.join(dst, item)
	93
	94	if os.path.exists(itemdst):
	95	if overwrite is True:
[7186]	96	remove_file_or_directory(itemdst)
[4188]	97	else:
	98	not_copied.append(item)
	99	continue
[6113]	100
[4188]	101	if os.path.isdir(itemsrc):
	102	shutil.copytree(itemsrc, itemdst)
	103	else:
	104	shutil.copy2(itemsrc, itemdst)
	105	if del_old:
[7186]	106	remove_file_or_directory(itemsrc)
[4188]	107	return not_copied
[4375]	108
	109
[7186]	110	def get_inner_HTML_part(html_code):
[4375]	111	"""Return the 'inner' part of a complete HTML snippet.
	112
	113	If there is a form part, get this.
	114
	115	If there is no form part, try to return the body part contents.
	116
	117	If there is no body, return as-is.
[5738]	118
	119	Let's see how that works. If we deliver some doc with form, we
	120	will get that form only:
	121
	122	>>> doc = '<html><form>My Form</form>Outside the form</html>'
[7186]	123	>>> get_inner_HTML_part(doc)
[5738]	124	'<form>My Form</form>'
	125
	126	No form? Then seek for a body part and get the contents:
	127
	128	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
[7186]	129	>>> get_inner_HTML_part(doc)
[5738]	130	'My Body'
	131
	132	If none of these is included, return what we got:
	133
	134	>>> doc = '<html>without body nor form</html>'
[7186]	135	>>> get_inner_HTML_part(doc)
[5738]	136	'<html>without body nor form</html>'
	137
[4375]	138	"""
	139
	140	try:
[5738]	141	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
[4375]	142	re.DOTALL).groups()[0]
	143	return result
	144	except AttributeError:
	145	# No <form> part included
	146	try:
	147	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
	148	re.DOTALL).groups()[0]
	149	return result
	150	except AttributeError:
	151	# No <form> and no <body> tag...
	152	pass
	153	return html_code
	154
[10677]	155
[5731]	156	class FactoryBase(grok.GlobalUtility):
	157	"""A factory for things.
	158
	159	This is a baseclass for easier creation of factories. Factories
	160	are utilities that are registered under a certain name and return
	161	instances of certain classes when called.
	162
[7811]	163	In :mod:`waeup.kofa` we use factories extensively for
[7933]	164	batching. While processing a batch some processors looks up a
[5731]	165	factory to create real-world instances that then get filled with
	166	data from imported CSV files.
	167
	168	To get rid of reimplementing the same stuff over and over again,
	169	most notably the methods defined here, we offer this base class
	170	(which will not be registered as a factory itself).
	171
	172	Real factories can then be created like this:
	173
	174	>>> import grok
[7811]	175	>>> from waeup.kofa.utils.helpers import FactoryBase
[5731]	176	>>> class MyObject(object):
	177	... # Some class we want to get instances of.
	178	... pass
	179	>>> class MyObjectFactory(FactoryBase):
	180	... # This is the factory for MyObject instances
[7811]	181	... grok.name(u'waeup.kofa.factory.MyObject')
[5731]	182	... factory = MyObject
	183
	184	That's it. It is essential to set the ``factory`` attribute, which
	185	will determine the class of which instances should be created when
	186	called. The given name must even be unique amongst all utilities
	187	registered during runtime. While you can pick any name you like
[7811]	188	you might want to prepend ``waeup.kofa.factory.`` to the name
[5731]	189	string to make sure it does not clash with names of other
	190	utilities one day.
	191
	192	Before all this works we have to grok the baseclass once and our
	193	freshly defined factory. This executes all the component
	194	registration stuff we don't want to do ourselves. In daily use
[7811]	195	this is done automatically on startup of a :mod:`waeup.kofa`
[5731]	196	system.
[6113]	197
[7811]	198	>>> grok.testing.grok('waeup.kofa.utils.helpers')
[5731]	199	>>> grok.testing.grok_component(
	200	... 'MyObjectFactory', MyObjectFactory
	201	... )
	202	True
	203
[7933]	204	After grokking we (and processors) can create objects without
[5731]	205	knowing about the location of the real class definition, just by
	206	the factory name:
	207
	208	>>> from zope.component import createObject
[7811]	209	>>> obj = createObject('waeup.kofa.factory.MyObject')
[5731]	210	>>> isinstance(obj, MyObject)
	211	True
	212
	213	We can also use the regular utility lookups to find our new
	214	factory:
	215
	216	>>> from zope.component import getUtility
	217	>>> from zope.component.interfaces import IFactory
	218	>>> factory = getUtility(
[7811]	219	... IFactory, name='waeup.kofa.factory.MyObject'
[5731]	220	... )
	221	>>> isinstance(factory, MyObjectFactory)
	222	True
	223
	224	And this factory generates `MyObject` instances:
	225
	226	>>> obj = factory()
	227	>>> isinstance(obj, MyObject)
	228	True
	229
	230	"""
[10677]	231	grok.baseclass() # Do not grok this class, do not register us.
[5731]	232	grok.implements(IFactory)
	233	# You can override any of the following attributes in derived
	234	# classes. The `grok.name` setting must even be set to some
	235	# unique value.
	236	grok.name(u'waeup.Factory')
	237	title = u"Create instances of ``factory``.",
	238	description = u"This factory instantiates new applicant instances."
	239	factory = None
	240
	241	def __call__(self, args, *kw):
	242	"""The main factory function.
	243
	244	Returns an instance of the requested object.
	245	"""
	246	return self.factory()
	247
	248	def getInterfaces(self):
	249	# Required by IFactory
	250	return implementedBy(self.factory)
[5848]	251
[10677]	252
[5848]	253	def ReST2HTML_w_warnings(source_string):
	254	"""Convert a reStructuredText string to HTML preserving warnings.
	255
	256	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
	257	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
[5876]	258	source string (in unicode), ``<WARNINGS>`` is a string containing
	259	any warning messages or ``None``.
[6113]	260
[5848]	261	Regular multi-line ReStructuredText strings will be returned as
	262	HTML code:
	263
[7811]	264	>>> from waeup.kofa.utils.helpers import ReST2HTML
[5848]	265	>>> source = '''
	266	... Headline
	267	... ========
	268	...
	269	... - A list item
	270	... - Another item
	271	...
	272	... Thanks for watching!
	273	... '''
	274	>>> html, warnings = ReST2HTML_w_warnings(source)
	275	>>> print html
	276	<div class="document" id="headline">
	277	<h1 class="title">Headline</h1>
	278	<BLANKLINE>
	279	<ul class="simple">
	280	<li>A list item</li>
	281	<li>Another item</li>
	282	</ul>
	283	<p>Thanks for watching!</p>
	284	</div>
	285
	286	Here no warnings happened, so the `warnings` are ``None``:
	287
	288	>>> warnings is None
	289	True
[6113]	290
[5848]	291	If warnings happen then they can be retrieved in the returned
	292	``warnings``. We try to render an erraneous document:
	293
	294	>>> source = '''
	295	... Headline
	296	... ======
	297	...
	298	... Thanks for watching!
	299	... '''
	300	>>> html, warnings = ReST2HTML_w_warnings(source)
	301	>>> print html
	302	<div class="document" id="headline">
	303	<h1 class="title">Headline</h1>
	304	<BLANKLINE>
	305	<p>Thanks for watching!</p>
	306	</div>
	307
	308	>>> print warnings
	309	<string>:3: (WARNING/2) Title underline too short.
	310	<BLANKLINE>
	311	Headline
	312	======
	313	<BLANKLINE>
	314
	315	As you can see, the warnings are not displayed inline the document
	316	but can be retrieved from the returned warnings, which is a string
	317	or ``None``.
	318	"""
	319	warnings = StringIO()
	320	fulldoc = publish_string(
	321	source_string, writer_name='html4css1',
	322	settings_overrides={
	323	'report_level': 0,
	324	'warning_stream': warnings,
	325	})
	326	warnings.seek(0)
	327	warning_msgs = warnings.read()
	328	if warning_msgs:
	329	# Render again, this time with no warnings inline...
[10677]	330	fulldoc = publish_string(
[5848]	331	source_string, writer_name='html4css1',
	332	settings_overrides={
	333	'report_level': 10000,
	334	'halt_level': 10000,
	335	'warning_stream': warnings,
	336	})
	337	if warning_msgs == '':
	338	warning_msgs = None
[7186]	339	result = get_inner_HTML_part(fulldoc).strip()
[5876]	340	if not isinstance(result, unicode):
	341	result = result.decode('utf-8')
	342	return result, warning_msgs
[5848]	343
[10677]	344
[5848]	345	def ReST2HTML(source_string):
	346	"""Render a string containing ReStructuredText to HTML.
	347
	348	Any warnings about too short headings, etc. are silently
	349	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
	350	warnings.
	351
[5876]	352	The returned string will be unicode.
[6113]	353
[5848]	354	A regular document will be rendered like this:
	355
	356	>>> source = '''
	357	... Headline
	358	... ========
	359	...
	360	... Thanks for watching!
	361	... '''
	362	>>> html = ReST2HTML(source)
	363	>>> print html
	364	<div class="document" id="headline">
	365	<h1 class="title">Headline</h1>
	366	<BLANKLINE>
	367	<p>Thanks for watching!</p>
	368	</div>
	369
	370	A document with markup problems (here: the underline is too short)
	371	will look similar:
	372
	373	>>> source = '''
	374	... Headline
	375	... ======
	376	...
	377	... Thanks for watching!
	378	... '''
	379	>>> html = ReST2HTML(source)
	380	>>> print html
	381	<div class="document" id="headline">
	382	<h1 class="title">Headline</h1>
	383	<BLANKLINE>
	384	<p>Thanks for watching!</p>
	385	</div>
[6113]	386
[5848]	387	"""
	388	html, warnings = ReST2HTML_w_warnings(source_string)
	389	return html
[6071]	390
[10677]	391
[9689]	392	def attrs_to_fields(cls, omit=[]):
[12974]	393	"""Set class attributes and bind them to the data definitions
	394	specified in the interface by turning the attributes into FieldProperty
	395	instances.
[6113]	396
	397	With Python >= 2.6 we can even use this function as a class decorator.
[9689]	398
	399	`omit` is a list of field names that should _not_ be turned into
	400	field properties. This is useful for properties and the like.
[6071]	401	"""
	402	iface = list(implementedBy(cls))[0]
	403	for field_name in getFieldNames(iface):
[9689]	404	if field_name in omit:
	405	continue
[11476]	406	field_property = FieldProperty(iface[field_name])
	407	# Set proper docstring for the API docs.
	408	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
	409	setattr(cls, field_name, field_property)
[6071]	410	return cls
[6372]	411
[10677]	412
[6372]	413	def get_current_principal():
	414	"""Get the 'current' principal.
	415
	416	This method works without a request. Examining a request is the
	417	regular (and recommended) way to get a principal involved
	418	'currently'.
	419
	420	Use this method only if you really have no access to the current
	421	request.
	422
	423	Returns ``None`` when no principal is involved (for instance
	424	during tests).
	425	"""
	426	try:
	427	principal = getInteraction().participations[0].principal
	428	except NoInteraction:
	429	return None
[10677]	430	except IndexError: # No participations present
[6372]	431	return None
	432	return principal
[6503]	433
[10677]	434
[6503]	435	def cmp_files(file_descr1, file_descr2):
	436	"""Compare two files by their file descriptors.
	437
	438	Returns ``True`` if both are equal, ``False`` otherwise.
	439	"""
[6531]	440	file_descr1.seek(0)
	441	file_descr2.seek(0)
[6503]	442	while True:
	443	b1 = file_descr1.read(BUFSIZE)
	444	b2 = file_descr2.read(BUFSIZE)
	445	if b1 != b2:
	446	return False
	447	if not b1:
	448	return True
[7078]	449
[10677]	450
[7078]	451	def string_from_bytes(number):
	452	"""Turn a number into some textual representation.
	453
	454	Examples:
	455
	456	>>> string_from_bytes(1)
	457	u'1 byte(s)'
	458
	459	>>> string_from_bytes(1025)
	460	u'1 KB'
	461
	462	>>> string_from_bytes(1.5 * 1024*1024)
	463	u'1.50 MB'
	464
	465	>>> string_from_bytes(673.286 * 1024**3)
	466	u'673.29 GB'
	467
	468	"""
	469	if number < 1024:
	470	return u'%s byte(s)' % (str(number),)
[10677]	471	elif number < 1024 ** 2:
[7078]	472	return u'%s KB' % (number / 1024,)
[10677]	473	elif number < 1024 ** 3:
	474	return u'%.2f MB' % (number / 1024 ** 2,)
	475	return u'%.2f GB' % (number / 1024 ** 3,)
[7079]	476
[10677]	477
[7079]	478	def file_size(file_like_obj):
	479	"""Determine file size in most effective manner.
	480
	481	Returns the number of bytes in a file. This function works for
	482	both, real files as well as file-like objects like cStringIO based
	483	'files'.
	484
	485	Example:
	486
	487	>>> from cStringIO import StringIO
	488	>>> file_size(StringIO('my file content'))
	489	15
	490
	491	Please note that this function expects the file-like object passed
	492	in to be at first reading position (it does no seek(0)) and that
	493	when finished the file pointer might be at end of file.
	494	"""
	495	if hasattr(file_like_obj, 'fileno'):
	496	return os.fstat(file_like_obj.fileno())[6]
[10677]	497	file_like_obj.seek(0, 2) # seek to last position in file
[7079]	498	return file_like_obj.tell()
[7175]	499
[10677]	500
[7175]	501	def get_user_account(request):
	502	"""Return local user account.
	503	"""
	504	principal_id = request.principal.id
[7234]	505	authenticator = getUtility(IAuthenticatorPlugin, name='users')
	506	account = authenticator.getAccount(principal_id)
[7175]	507	return account
[7941]	508
[10677]	509
[7941]	510	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
	511	"""Get all attribute names of an interface.
	512
	513	Searches also base interfaces.
	514
	515	Names of fields that are pure attributes
	516	(i.e. zope.interface.Attribute) or methods are excluded by
	517	default.
	518
	519	Names of typical fields derived from zope.schema are included.
	520
	521	The `omit` paramter can give a list of names to exclude.
	522
	523	Returns an unsorted list of strings.
	524	"""
[8370]	525	ifaces = set((iface,))
	526	# Collect all interfaces (also bases) recursively
	527	while True:
	528	ext_ifaces = set(ifaces)
	529	for iface in ext_ifaces:
	530	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
	531	if ext_ifaces == ifaces:
	532	# No new interfaces found, list complete
	533	break
	534	ifaces = ext_ifaces
	535	# Collect (filtered) names of collected interfaces
[7941]	536	result = []
[8370]	537	for iface in ifaces:
	538	for name, descr in iface.namesAndDescriptions():
	539	if name in omit:
	540	continue
	541	if exclude_attribs and descr.__class__ is Attribute:
	542	continue
	543	if exclude_methods and isinstance(descr, Method):
	544	continue
[9043]	545	if name in result:
	546	continue
[8370]	547	result.append(name)
[7941]	548	return result
[7968]	549
[10677]	550
[7968]	551	def get_sorted_preferred(tuples_iterable, preferred_list):
	552	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
	553	`preferred_list` put in front.
	554
	555	The rest of the tuples iterable is returned in orginal order. This
	556	is useful for putting default entries on top of (already sorted)
	557	lists of choice values, for instance when sorting countries and
	558	their code.
	559
	560	Sample:
	561
	562	We have a list of tuples with uppercase 'titles' and lowercase
	563	'tokens'. This list is already sorted but we want certain values
	564	of this list to show up before other values. For instance we want
	565	to see the 'C' entry to come first.
	566
	567	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	568	... ['c'])
	569	(('C', 'c'), ('A', 'a'), ('B', 'b'))
	570
	571	i.e. the entry with 'c' as second value moved to head of result.
	572
	573	We can also require multiple entries at head of list:
	574
	575	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	576	... ['b', 'c'])
	577	(('B', 'b'), ('C', 'c'), ('A', 'a'))
	578
	579	We required the 'b' entry to come before the 'c' entry and then
	580	the rest of the input list. That's what we got.
	581
	582	The result is returned as a tuple of tuples to keep order of values.
	583	"""
	584	result = [None for x in preferred_list]
	585	for title, code in tuples_iterable:
	586	if code in preferred_list:
	587	index = preferred_list.index(code)
	588	result[index] = (title, code)
	589	else:
	590	result.append((title, code))
	591	return tuple(result)
[8185]	592
[10677]	593
[8185]	594	def now(tz=None):
	595	"""Get current datetime in timezone of `tz`.
	596
	597	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
	598
	599	`tz` should be a timezone as defined in pytz.
	600	"""
	601	return to_timezone(datetime.datetime.utcnow(), tz=tz)
	602
[10677]	603
[8185]	604	def to_timezone(dt, tz=None):
	605	"""Shift datetime into timezone `tz`.
	606
	607	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
	608	assumed to be UTC.
	609
	610	If no `tz` is given, shift to UTC is performed.
[8192]	611
	612	If `dt` is not a datetime.datetime, the input value is returned
	613	unchanged.
[8185]	614	"""
[8192]	615	if not isinstance(dt, datetime.datetime):
	616	return dt
[8185]	617	if tz is None:
	618	tz = pytz.utc
	619	if dt.tzinfo is None:
	620	dt = pytz.utc.localize(dt)
	621	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
[8466]	622
[10677]	623
[11660]	624	def imghdr_test_fpm(h, f):
	625	"""FPM fileformat test.
	626
	627	The `fpm` fileformat is the binary fingerprint data as created by
	628	`libfprint`.
	629	"""
	630	if len(h) >= 3 and h[:3] == 'FP1':
	631	return 'fpm'
	632
	633
	634	#: Add test function in stdlib's imghdr tests.
	635	imghdr.tests.append(imghdr_test_fpm)
	636
	637
[8466]	638	def get_fileformat(path, bytestream=None):
	639	"""Try to determine the file format of a given media file.
	640
	641	Although checks done here are not done very thoroughly, they make
	642	no assumptions about the filetype by looking at its filename
	643	extension or similar. Instead they check header data to comply
	644	with common known rules (Magic Words).
	645
	646	If bytestream is not `None` the `path` is ignored.
	647
	648	Returns filetype as string (something like ``'jpg'``) if
	649	file-format can be recognized, ``None`` else.
	650
[11660]	651	Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
	652	`pdf`.
[8466]	653
	654	More filetypes (though untested in waeup.kofa) are automatically
	655	recognized because we deploy the stdlib `imghdr` library. See this
	656	module's docs for a complete list of filetypes recognized.
	657	"""
	658	if path is None and bytestream is None:
	659	return None
	660
	661	img_type = None
	662	if bytestream is not None:
	663	img_type = imghdr.what(path, bytestream)
	664	else:
	665	img_type = imghdr.what(path)
	666	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
	667	if img_type == name:
	668	img_type = replacement
	669	return img_type
	670
[10677]	671
[8466]	672	def check_pdf(bytestream, file):
	673	"""Tell whether a file or bytestream is a PDF file.
	674
	675	Works as a test/plugin for the stdlib `imghdr` library.
	676	"""
	677	if file is not None:
	678	file.seek(0)
	679	bytestream = file.read(4)
	680	file.seek(0)
	681
	682	if bytestream.startswith('%PDF'):
	683	return 'pdf'
	684	return None
	685
	686	# register check_pdf as header check function with `imghdr`
	687	if check_pdf not in imghdr.tests:
	688	imghdr.tests.append(check_pdf)
[8631]	689
[10677]	690
[8631]	691	def merge_csv_files(path1, path2):
	692	"""Merge two CSV files into one (appending).
	693
	694	CSV data from `path2` will be merged into `path1` csv file. This
	695	is a bit like 'appending' data from path2 to data from path1.
	696
	697	The path of the resulting temporary file will be returned.
	698
	699	In the result file data from `path2` will always come _after_ data
	700	from `path1`.
	701
	702	Caution: It is the _callers_ responsibility to remove the
	703	result file (which is created by tempfile.mkstemp) after usage.
	704
	705	This CSV file merging copes with different column orders in both
	706	CSV files and even with different column sets in both files.
[8633]	707
	708	Also broken/empty CSV files can be handled.
[8631]	709	"""
	710	# sniff the col names
[8633]	711	try:
	712	row10 = csv.DictReader(open(path1, 'rb')).next()
	713	except StopIteration:
	714	row10 = dict()
	715	try:
	716	row20 = csv.DictReader(open(path2, 'rb')).next()
	717	except StopIteration:
	718	row20 = dict()
[8631]	719	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
	720	# now read/write the real data
	721	reader1 = csv.DictReader(open(path1, 'rb'))
	722	reader2 = csv.DictReader(open(path2, 'rb'))
	723	wp, tmp_path = tempfile.mkstemp()
	724	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
[10677]	725	writer.writerow(dict((x, x) for x in fieldnames)) # header
[8631]	726	for row in reader1:
	727	writer.writerow(row)
	728	for row in reader2:
	729	writer.writerow(row)
	730	return tmp_path
[9372]	731
[10677]	732
[9372]	733	def product(sequence, start=1):
	734	"""Returns the product of a sequence of numbers (_not_ strings)
	735	multiplied by the parameter `start` (defaults to 1). If the
	736	sequence is empty, returns 0.
	737	"""
	738	if not len(sequence):
	739	return 0
	740	result = start
	741	for item in sequence:
	742	result *= item
	743	return result
[9593]	744
[10677]	745
[9593]	746	class NullHandler(logging.Handler):
	747	"""A logging NullHandler.
	748
	749	Does not log anything. Useful if you want to shut up a log.
	750
	751	Defined here for backwards compatibility with Python < 2.7.
	752	"""
	753	def emit(self, record):
	754	pass
[10676]	755
	756
	757	def check_csv_charset(iterable):
[14939]	758	"""Check contents of `iterable` regarding valid CSV encoding and
	759	trailing whitespaces in data.
[10676]	760
	761	`iterable` is expected to be an iterable on _rows_ (not
	762	chars). This is true for instance for
	763	filehandlers. `zope.publisher.browser.FileUpload` instances are
	764	_not_ iterable, unfortunately.
	765
	766	Returns line num of first illegal char or ``None``. Line nums
[14939]	767	start counting with 1 (not zero). Returns -1 if data contain
	768	trailing whitespaces.
[10676]	769	"""
	770	linenum = 1
	771	try:
[13537]	772	reader = csv.DictReader(iterable)
[10676]	773	for row in reader:
	774	linenum += 1
[14939]	775	for value in row.values():
	776	if value.endswith(' '):
	777	return -1
[10676]	778	except UnicodeDecodeError:
	779	return linenum
	780	except:
	781	return linenum + 1
	782	return None
[11824]	783
	784
	785	class MemInfo(dict):
	786	"""A dict with access to its items like if they are attributes.
	787	"""
	788	__getattr__ = dict.__getitem__
	789	__setattr__ = dict.__setitem__
	790	__delattr__ = dict.__delitem__
	791
	792
	793	def get_meminfo(src="/proc/meminfo"):
	794	"""Get local memory info as provided in /proc/meminfo.
	795
	796	Entries in /proc/meminfo are available as MemInfo attributes.
	797
	798	By default we lookup a file /proc/meminfo. Another path can be
	799	lines = open(src, 'r').read()passed in as `src` parameter. In this
	800	case `src` must be a regular file and contain meminfo-style data.
	801
	802	If the given `src` (or `/proc/meminfo`) are not available, `None`
	803	lines = open(src, 'r').read()is returned.
	804	"""
	805	if not os.path.isfile(src):
	806	return None
	807	lines = open(src, 'r').read().splitlines()
	808	result = MemInfo()
	809	for line in lines:
	810	key, value = line.split(':', 1)
	811	value = int(value.split(' kB', 1)[0])
	812	result[key] = value
	813	return result
[12231]	814
	815	def html2dict(value=None,portal_language='en'):
	816	"""Transforms a localized HTML text string into a dictionary.
	817
[13077]	818	Different languages must be separated by ``>>xy<<`` whereas
[12231]	819	xy is the language code. Text parts without correct leading
	820	language separator - usually the first part has no language
	821	descriptor - are interpreted as texts in the portal's language.
	822	"""
	823	try:
	824	parts = value.split('>>')
	825	except:
	826	return {}
	827	elements = {}
	828	lang = portal_language
	829	for part in parts:
	830	if part[2:4] == u'<<':
[12393]	831	lang = str(part[0:2].lower())
[12231]	832	text = part[4:]
	833	elements[lang] = renderElement(u'div id="html"',
	834	contents=text)
	835	else:
	836	text = part
	837	elements[lang] = renderElement(u'div id="html"',
	838	contents=text)
[12433]	839	return elements
	840
	841	def rest2dict(value=None,portal_language='en'):
	842	"""Transforms a localized REST text string into a dictionary.
	843
[13077]	844	Different languages must be separated by ``>>xy<<``` whereas
[12433]	845	xy is the language code. Text parts without correct leading
	846	language separator - usually the first part has no language
	847	descriptor - are interpreted as texts in the portal's language.
	848	"""
	849	try:
	850	parts = value.split('>>')
	851	except:
	852	return {}
	853	elements = {}
	854	lang = portal_language
	855	for part in parts:
	856	if part[2:4] == u'<<':
	857	lang = str(part[0:2].lower())
	858	text = part[4:]
	859	elements[lang] = renderElement(u'div id="rest"',
	860	contents=ReST2HTML(text))
	861	else:
	862	text = part
	863	elements[lang] = renderElement(u'div id="rest"',
	864	contents=ReST2HTML(text))
[12231]	865	return elements

Note: See TracBrowser for help on using the repository browser.

Download in other formats: