Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 11820

Last change on this file since 11820 was 11660, checked in by uli, 11 years ago
Add fileformat checker for fpm files.
Property svn:keywords set to `Id`
File size: 23.5 KB

Rev	Line
[7196]	1	## $Id: helpers.py 11660 2014-05-19 17:30:44Z uli $
	2	##
	3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
[7819]	18	"""General helper functions for Kofa.
[4188]	19	"""
[10677]	20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
[8185]	21	import datetime
[8466]	22	import imghdr
[9593]	23	import logging
[4188]	24	import os
[8185]	25	import pytz
[4375]	26	import re
[4188]	27	import shutil
[8631]	28	import tempfile
[5731]	29	import grok
[5848]	30	from cStringIO import StringIO
	31	from docutils.core import publish_string
[7943]	32	from zope.component import getUtility
[5731]	33	from zope.component.interfaces import IFactory
[5734]	34	from zope.interface import implementedBy
[7941]	35	from zope.interface.interface import Method, Attribute
[6071]	36	from zope.schema import getFieldNames
	37	from zope.schema.fieldproperty import FieldProperty
[6372]	38	from zope.security.interfaces import NoInteraction
	39	from zope.security.management import getInteraction
[7175]	40	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
[4188]	41
[6503]	42	BUFSIZE = 8 * 1024
[6372]	43
[10677]	44
[7186]	45	def remove_file_or_directory(filepath):
[4188]	46	"""Remove a file or directory.
[5738]	47
	48	Different to :func:`shutil.rmtree` we also accept not existing
	49	paths (returning silently) and if a dir turns out to be a regular
	50	file, we remove that.
[4188]	51	"""
	52	filepath = os.path.abspath(filepath)
	53	if not os.path.exists(filepath):
	54	return
	55	if os.path.isdir(filepath):
	56	shutil.rmtree(filepath)
	57	else:
	58	os.unlink(filepath)
	59	return
	60
[10677]	61
[7186]	62	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
[4188]	63	"""Copy contents of directory src to directory dst.
	64
	65	Both directories must exists.
	66
	67	If `overwrite` is true, any same named objects will be
	68	overwritten. Otherwise these files will not be touched.
	69
	70	If `del_old` is true, copied files and directories will be removed
	71	from the src directory.
	72
	73	This functions returns a list of non-copied files.
	74
	75	Unix hidden files and directories (starting with '.') are not
	76	processed by this function.
	77	"""
	78	if not os.path.exists(src):
	79	raise ValueError('source path does not exist: %s' % src)
	80	if not os.path.exists(dst):
	81	raise ValueError('destination path does not exist: %s' % dst)
	82	if not os.path.isdir(src):
	83	raise ValueError('source path is not a directory: %s' % src)
	84	if not os.path.isdir(dst):
	85	raise ValueError('destination path is not a directory: %s' % dst)
	86	not_copied = []
	87	for item in os.listdir(src):
	88	if item.startswith('.'):
[10677]	89	continue # We do not copy hidden stuff...
[4188]	90	itemsrc = os.path.join(src, item)
	91	itemdst = os.path.join(dst, item)
	92
	93	if os.path.exists(itemdst):
	94	if overwrite is True:
[7186]	95	remove_file_or_directory(itemdst)
[4188]	96	else:
	97	not_copied.append(item)
	98	continue
[6113]	99
[4188]	100	if os.path.isdir(itemsrc):
	101	shutil.copytree(itemsrc, itemdst)
	102	else:
	103	shutil.copy2(itemsrc, itemdst)
	104	if del_old:
[7186]	105	remove_file_or_directory(itemsrc)
[4188]	106	return not_copied
[4375]	107
	108
[7186]	109	def get_inner_HTML_part(html_code):
[4375]	110	"""Return the 'inner' part of a complete HTML snippet.
	111
	112	If there is a form part, get this.
	113
	114	If there is no form part, try to return the body part contents.
	115
	116	If there is no body, return as-is.
[5738]	117
	118	Let's see how that works. If we deliver some doc with form, we
	119	will get that form only:
	120
	121	>>> doc = '<html><form>My Form</form>Outside the form</html>'
[7186]	122	>>> get_inner_HTML_part(doc)
[5738]	123	'<form>My Form</form>'
	124
	125	No form? Then seek for a body part and get the contents:
	126
	127	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
[7186]	128	>>> get_inner_HTML_part(doc)
[5738]	129	'My Body'
	130
	131	If none of these is included, return what we got:
	132
	133	>>> doc = '<html>without body nor form</html>'
[7186]	134	>>> get_inner_HTML_part(doc)
[5738]	135	'<html>without body nor form</html>'
	136
[4375]	137	"""
	138
	139	try:
[5738]	140	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
[4375]	141	re.DOTALL).groups()[0]
	142	return result
	143	except AttributeError:
	144	# No <form> part included
	145	try:
	146	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
	147	re.DOTALL).groups()[0]
	148	return result
	149	except AttributeError:
	150	# No <form> and no <body> tag...
	151	pass
	152	return html_code
	153
[10677]	154
[5731]	155	class FactoryBase(grok.GlobalUtility):
	156	"""A factory for things.
	157
	158	This is a baseclass for easier creation of factories. Factories
	159	are utilities that are registered under a certain name and return
	160	instances of certain classes when called.
	161
[7811]	162	In :mod:`waeup.kofa` we use factories extensively for
[7933]	163	batching. While processing a batch some processors looks up a
[5731]	164	factory to create real-world instances that then get filled with
	165	data from imported CSV files.
	166
	167	To get rid of reimplementing the same stuff over and over again,
	168	most notably the methods defined here, we offer this base class
	169	(which will not be registered as a factory itself).
	170
	171	Real factories can then be created like this:
	172
	173	>>> import grok
[7811]	174	>>> from waeup.kofa.utils.helpers import FactoryBase
[5731]	175	>>> class MyObject(object):
	176	... # Some class we want to get instances of.
	177	... pass
	178	>>> class MyObjectFactory(FactoryBase):
	179	... # This is the factory for MyObject instances
[7811]	180	... grok.name(u'waeup.kofa.factory.MyObject')
[5731]	181	... factory = MyObject
	182
	183	That's it. It is essential to set the ``factory`` attribute, which
	184	will determine the class of which instances should be created when
	185	called. The given name must even be unique amongst all utilities
	186	registered during runtime. While you can pick any name you like
[7811]	187	you might want to prepend ``waeup.kofa.factory.`` to the name
[5731]	188	string to make sure it does not clash with names of other
	189	utilities one day.
	190
	191	Before all this works we have to grok the baseclass once and our
	192	freshly defined factory. This executes all the component
	193	registration stuff we don't want to do ourselves. In daily use
[7811]	194	this is done automatically on startup of a :mod:`waeup.kofa`
[5731]	195	system.
[6113]	196
[7811]	197	>>> grok.testing.grok('waeup.kofa.utils.helpers')
[5731]	198	>>> grok.testing.grok_component(
	199	... 'MyObjectFactory', MyObjectFactory
	200	... )
	201	True
	202
[7933]	203	After grokking we (and processors) can create objects without
[5731]	204	knowing about the location of the real class definition, just by
	205	the factory name:
	206
	207	>>> from zope.component import createObject
[7811]	208	>>> obj = createObject('waeup.kofa.factory.MyObject')
[5731]	209	>>> isinstance(obj, MyObject)
	210	True
	211
	212	We can also use the regular utility lookups to find our new
	213	factory:
	214
	215	>>> from zope.component import getUtility
	216	>>> from zope.component.interfaces import IFactory
	217	>>> factory = getUtility(
[7811]	218	... IFactory, name='waeup.kofa.factory.MyObject'
[5731]	219	... )
	220	>>> isinstance(factory, MyObjectFactory)
	221	True
	222
	223	And this factory generates `MyObject` instances:
	224
	225	>>> obj = factory()
	226	>>> isinstance(obj, MyObject)
	227	True
	228
	229	"""
[10677]	230	grok.baseclass() # Do not grok this class, do not register us.
[5731]	231	grok.implements(IFactory)
	232	# You can override any of the following attributes in derived
	233	# classes. The `grok.name` setting must even be set to some
	234	# unique value.
	235	grok.name(u'waeup.Factory')
	236	title = u"Create instances of ``factory``.",
	237	description = u"This factory instantiates new applicant instances."
	238	factory = None
	239
	240	def __call__(self, args, *kw):
	241	"""The main factory function.
	242
	243	Returns an instance of the requested object.
	244	"""
	245	return self.factory()
	246
	247	def getInterfaces(self):
	248	# Required by IFactory
	249	return implementedBy(self.factory)
[5848]	250
[10677]	251
[5848]	252	def ReST2HTML_w_warnings(source_string):
	253	"""Convert a reStructuredText string to HTML preserving warnings.
	254
	255	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
	256	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
[5876]	257	source string (in unicode), ``<WARNINGS>`` is a string containing
	258	any warning messages or ``None``.
[6113]	259
[5848]	260	Regular multi-line ReStructuredText strings will be returned as
	261	HTML code:
	262
[7811]	263	>>> from waeup.kofa.utils.helpers import ReST2HTML
[5848]	264	>>> source = '''
	265	... Headline
	266	... ========
	267	...
	268	... - A list item
	269	... - Another item
	270	...
	271	... Thanks for watching!
	272	... '''
	273	>>> html, warnings = ReST2HTML_w_warnings(source)
	274	>>> print html
	275	<div class="document" id="headline">
	276	<h1 class="title">Headline</h1>
	277	<BLANKLINE>
	278	<ul class="simple">
	279	<li>A list item</li>
	280	<li>Another item</li>
	281	</ul>
	282	<p>Thanks for watching!</p>
	283	</div>
	284
	285	Here no warnings happened, so the `warnings` are ``None``:
	286
	287	>>> warnings is None
	288	True
[6113]	289
[5848]	290	If warnings happen then they can be retrieved in the returned
	291	``warnings``. We try to render an erraneous document:
	292
	293	>>> source = '''
	294	... Headline
	295	... ======
	296	...
	297	... Thanks for watching!
	298	... '''
	299	>>> html, warnings = ReST2HTML_w_warnings(source)
	300	>>> print html
	301	<div class="document" id="headline">
	302	<h1 class="title">Headline</h1>
	303	<BLANKLINE>
	304	<p>Thanks for watching!</p>
	305	</div>
	306
	307	>>> print warnings
	308	<string>:3: (WARNING/2) Title underline too short.
	309	<BLANKLINE>
	310	Headline
	311	======
	312	<BLANKLINE>
	313
	314	As you can see, the warnings are not displayed inline the document
	315	but can be retrieved from the returned warnings, which is a string
	316	or ``None``.
	317	"""
	318	warnings = StringIO()
	319	fulldoc = publish_string(
	320	source_string, writer_name='html4css1',
	321	settings_overrides={
	322	'report_level': 0,
	323	'warning_stream': warnings,
	324	})
	325	warnings.seek(0)
	326	warning_msgs = warnings.read()
	327	if warning_msgs:
	328	# Render again, this time with no warnings inline...
[10677]	329	fulldoc = publish_string(
[5848]	330	source_string, writer_name='html4css1',
	331	settings_overrides={
	332	'report_level': 10000,
	333	'halt_level': 10000,
	334	'warning_stream': warnings,
	335	})
	336	if warning_msgs == '':
	337	warning_msgs = None
[7186]	338	result = get_inner_HTML_part(fulldoc).strip()
[5876]	339	if not isinstance(result, unicode):
	340	result = result.decode('utf-8')
	341	return result, warning_msgs
[5848]	342
[10677]	343
[5848]	344	def ReST2HTML(source_string):
	345	"""Render a string containing ReStructuredText to HTML.
	346
	347	Any warnings about too short headings, etc. are silently
	348	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
	349	warnings.
	350
[5876]	351	The returned string will be unicode.
[6113]	352
[5848]	353	A regular document will be rendered like this:
	354
	355	>>> source = '''
	356	... Headline
	357	... ========
	358	...
	359	... Thanks for watching!
	360	... '''
	361	>>> html = ReST2HTML(source)
	362	>>> print html
	363	<div class="document" id="headline">
	364	<h1 class="title">Headline</h1>
	365	<BLANKLINE>
	366	<p>Thanks for watching!</p>
	367	</div>
	368
	369	A document with markup problems (here: the underline is too short)
	370	will look similar:
	371
	372	>>> source = '''
	373	... Headline
	374	... ======
	375	...
	376	... Thanks for watching!
	377	... '''
	378	>>> html = ReST2HTML(source)
	379	>>> print html
	380	<div class="document" id="headline">
	381	<h1 class="title">Headline</h1>
	382	<BLANKLINE>
	383	<p>Thanks for watching!</p>
	384	</div>
[6113]	385
[5848]	386	"""
	387	html, warnings = ReST2HTML_w_warnings(source_string)
	388	return html
[6071]	389
[10677]	390
[9689]	391	def attrs_to_fields(cls, omit=[]):
[6071]	392	"""Turn the attributes of a class into FieldProperty instances.
[6113]	393
	394	With Python >= 2.6 we can even use this function as a class decorator.
[9689]	395
	396	`omit` is a list of field names that should _not_ be turned into
	397	field properties. This is useful for properties and the like.
[6071]	398	"""
	399	iface = list(implementedBy(cls))[0]
	400	for field_name in getFieldNames(iface):
[9689]	401	if field_name in omit:
	402	continue
[11476]	403	field_property = FieldProperty(iface[field_name])
	404	# Set proper docstring for the API docs.
	405	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
	406	setattr(cls, field_name, field_property)
[6071]	407	return cls
[6372]	408
[10677]	409
[6372]	410	def get_current_principal():
	411	"""Get the 'current' principal.
	412
	413	This method works without a request. Examining a request is the
	414	regular (and recommended) way to get a principal involved
	415	'currently'.
	416
	417	Use this method only if you really have no access to the current
	418	request.
	419
	420	Returns ``None`` when no principal is involved (for instance
	421	during tests).
	422	"""
	423	try:
	424	principal = getInteraction().participations[0].principal
	425	except NoInteraction:
	426	return None
[10677]	427	except IndexError: # No participations present
[6372]	428	return None
	429	return principal
[6503]	430
[10677]	431
[6503]	432	def cmp_files(file_descr1, file_descr2):
	433	"""Compare two files by their file descriptors.
	434
	435	Returns ``True`` if both are equal, ``False`` otherwise.
	436	"""
[6531]	437	file_descr1.seek(0)
	438	file_descr2.seek(0)
[6503]	439	while True:
	440	b1 = file_descr1.read(BUFSIZE)
	441	b2 = file_descr2.read(BUFSIZE)
	442	if b1 != b2:
	443	return False
	444	if not b1:
	445	return True
[7078]	446
[10677]	447
[7078]	448	def string_from_bytes(number):
	449	"""Turn a number into some textual representation.
	450
	451	Examples:
	452
	453	>>> string_from_bytes(1)
	454	u'1 byte(s)'
	455
	456	>>> string_from_bytes(1025)
	457	u'1 KB'
	458
	459	>>> string_from_bytes(1.5 * 1024*1024)
	460	u'1.50 MB'
	461
	462	>>> string_from_bytes(673.286 * 1024**3)
	463	u'673.29 GB'
	464
	465	"""
	466	if number < 1024:
	467	return u'%s byte(s)' % (str(number),)
[10677]	468	elif number < 1024 ** 2:
[7078]	469	return u'%s KB' % (number / 1024,)
[10677]	470	elif number < 1024 ** 3:
	471	return u'%.2f MB' % (number / 1024 ** 2,)
	472	return u'%.2f GB' % (number / 1024 ** 3,)
[7079]	473
[10677]	474
[7079]	475	def file_size(file_like_obj):
	476	"""Determine file size in most effective manner.
	477
	478	Returns the number of bytes in a file. This function works for
	479	both, real files as well as file-like objects like cStringIO based
	480	'files'.
	481
	482	Example:
	483
	484	>>> from cStringIO import StringIO
	485	>>> file_size(StringIO('my file content'))
	486	15
	487
	488	Please note that this function expects the file-like object passed
	489	in to be at first reading position (it does no seek(0)) and that
	490	when finished the file pointer might be at end of file.
	491	"""
	492	if hasattr(file_like_obj, 'fileno'):
	493	return os.fstat(file_like_obj.fileno())[6]
[10677]	494	file_like_obj.seek(0, 2) # seek to last position in file
[7079]	495	return file_like_obj.tell()
[7175]	496
[10677]	497
[7175]	498	def get_user_account(request):
	499	"""Return local user account.
	500	"""
	501	principal_id = request.principal.id
[7234]	502	authenticator = getUtility(IAuthenticatorPlugin, name='users')
	503	account = authenticator.getAccount(principal_id)
[7175]	504	return account
[7941]	505
[10677]	506
[7941]	507	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
	508	"""Get all attribute names of an interface.
	509
	510	Searches also base interfaces.
	511
	512	Names of fields that are pure attributes
	513	(i.e. zope.interface.Attribute) or methods are excluded by
	514	default.
	515
	516	Names of typical fields derived from zope.schema are included.
	517
	518	The `omit` paramter can give a list of names to exclude.
	519
	520	Returns an unsorted list of strings.
	521	"""
[8370]	522	ifaces = set((iface,))
	523	# Collect all interfaces (also bases) recursively
	524	while True:
	525	ext_ifaces = set(ifaces)
	526	for iface in ext_ifaces:
	527	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
	528	if ext_ifaces == ifaces:
	529	# No new interfaces found, list complete
	530	break
	531	ifaces = ext_ifaces
	532	# Collect (filtered) names of collected interfaces
[7941]	533	result = []
[8370]	534	for iface in ifaces:
	535	for name, descr in iface.namesAndDescriptions():
	536	if name in omit:
	537	continue
	538	if exclude_attribs and descr.__class__ is Attribute:
	539	continue
	540	if exclude_methods and isinstance(descr, Method):
	541	continue
[9043]	542	if name in result:
	543	continue
[8370]	544	result.append(name)
[7941]	545	return result
[7968]	546
[10677]	547
[7968]	548	def get_sorted_preferred(tuples_iterable, preferred_list):
	549	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
	550	`preferred_list` put in front.
	551
	552	The rest of the tuples iterable is returned in orginal order. This
	553	is useful for putting default entries on top of (already sorted)
	554	lists of choice values, for instance when sorting countries and
	555	their code.
	556
	557	Sample:
	558
	559	We have a list of tuples with uppercase 'titles' and lowercase
	560	'tokens'. This list is already sorted but we want certain values
	561	of this list to show up before other values. For instance we want
	562	to see the 'C' entry to come first.
	563
	564	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	565	... ['c'])
	566	(('C', 'c'), ('A', 'a'), ('B', 'b'))
	567
	568	i.e. the entry with 'c' as second value moved to head of result.
	569
	570	We can also require multiple entries at head of list:
	571
	572	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	573	... ['b', 'c'])
	574	(('B', 'b'), ('C', 'c'), ('A', 'a'))
	575
	576	We required the 'b' entry to come before the 'c' entry and then
	577	the rest of the input list. That's what we got.
	578
	579	The result is returned as a tuple of tuples to keep order of values.
	580	"""
	581	result = [None for x in preferred_list]
	582	for title, code in tuples_iterable:
	583	if code in preferred_list:
	584	index = preferred_list.index(code)
	585	result[index] = (title, code)
	586	else:
	587	result.append((title, code))
	588	return tuple(result)
[8185]	589
[10677]	590
[8185]	591	def now(tz=None):
	592	"""Get current datetime in timezone of `tz`.
	593
	594	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
	595
	596	`tz` should be a timezone as defined in pytz.
	597	"""
	598	return to_timezone(datetime.datetime.utcnow(), tz=tz)
	599
[10677]	600
[8185]	601	def to_timezone(dt, tz=None):
	602	"""Shift datetime into timezone `tz`.
	603
	604	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
	605	assumed to be UTC.
	606
	607	If no `tz` is given, shift to UTC is performed.
[8192]	608
	609	If `dt` is not a datetime.datetime, the input value is returned
	610	unchanged.
[8185]	611	"""
[8192]	612	if not isinstance(dt, datetime.datetime):
	613	return dt
[8185]	614	if tz is None:
	615	tz = pytz.utc
	616	if dt.tzinfo is None:
	617	dt = pytz.utc.localize(dt)
	618	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
[8466]	619
[10677]	620
[11660]	621	def imghdr_test_fpm(h, f):
	622	"""FPM fileformat test.
	623
	624	The `fpm` fileformat is the binary fingerprint data as created by
	625	`libfprint`.
	626	"""
	627	if len(h) >= 3 and h[:3] == 'FP1':
	628	return 'fpm'
	629
	630
	631	#: Add test function in stdlib's imghdr tests.
	632	imghdr.tests.append(imghdr_test_fpm)
	633
	634
[8466]	635	def get_fileformat(path, bytestream=None):
	636	"""Try to determine the file format of a given media file.
	637
	638	Although checks done here are not done very thoroughly, they make
	639	no assumptions about the filetype by looking at its filename
	640	extension or similar. Instead they check header data to comply
	641	with common known rules (Magic Words).
	642
	643	If bytestream is not `None` the `path` is ignored.
	644
	645	Returns filetype as string (something like ``'jpg'``) if
	646	file-format can be recognized, ``None`` else.
	647
[11660]	648	Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
	649	`pdf`.
[8466]	650
	651	More filetypes (though untested in waeup.kofa) are automatically
	652	recognized because we deploy the stdlib `imghdr` library. See this
	653	module's docs for a complete list of filetypes recognized.
	654	"""
	655	if path is None and bytestream is None:
	656	return None
	657
	658	img_type = None
	659	if bytestream is not None:
	660	img_type = imghdr.what(path, bytestream)
	661	else:
	662	img_type = imghdr.what(path)
	663	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
	664	if img_type == name:
	665	img_type = replacement
	666	return img_type
	667
[10677]	668
[8466]	669	def check_pdf(bytestream, file):
	670	"""Tell whether a file or bytestream is a PDF file.
	671
	672	Works as a test/plugin for the stdlib `imghdr` library.
	673	"""
	674	if file is not None:
	675	file.seek(0)
	676	bytestream = file.read(4)
	677	file.seek(0)
	678
	679	if bytestream.startswith('%PDF'):
	680	return 'pdf'
	681	return None
	682
	683	# register check_pdf as header check function with `imghdr`
	684	if check_pdf not in imghdr.tests:
	685	imghdr.tests.append(check_pdf)
[8631]	686
[10677]	687
[8631]	688	def merge_csv_files(path1, path2):
	689	"""Merge two CSV files into one (appending).
	690
	691	CSV data from `path2` will be merged into `path1` csv file. This
	692	is a bit like 'appending' data from path2 to data from path1.
	693
	694	The path of the resulting temporary file will be returned.
	695
	696	In the result file data from `path2` will always come _after_ data
	697	from `path1`.
	698
	699	Caution: It is the _callers_ responsibility to remove the
	700	result file (which is created by tempfile.mkstemp) after usage.
	701
	702	This CSV file merging copes with different column orders in both
	703	CSV files and even with different column sets in both files.
[8633]	704
	705	Also broken/empty CSV files can be handled.
[8631]	706	"""
	707	# sniff the col names
[8633]	708	try:
	709	row10 = csv.DictReader(open(path1, 'rb')).next()
	710	except StopIteration:
	711	row10 = dict()
	712	try:
	713	row20 = csv.DictReader(open(path2, 'rb')).next()
	714	except StopIteration:
	715	row20 = dict()
[8631]	716	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
	717	# now read/write the real data
	718	reader1 = csv.DictReader(open(path1, 'rb'))
	719	reader2 = csv.DictReader(open(path2, 'rb'))
	720	wp, tmp_path = tempfile.mkstemp()
	721	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
[10677]	722	writer.writerow(dict((x, x) for x in fieldnames)) # header
[8631]	723	for row in reader1:
	724	writer.writerow(row)
	725	for row in reader2:
	726	writer.writerow(row)
	727	return tmp_path
[9372]	728
[10677]	729
[9372]	730	def product(sequence, start=1):
	731	"""Returns the product of a sequence of numbers (_not_ strings)
	732	multiplied by the parameter `start` (defaults to 1). If the
	733	sequence is empty, returns 0.
	734	"""
	735	if not len(sequence):
	736	return 0
	737	result = start
	738	for item in sequence:
	739	result *= item
	740	return result
[9593]	741
[10677]	742
[9593]	743	class NullHandler(logging.Handler):
	744	"""A logging NullHandler.
	745
	746	Does not log anything. Useful if you want to shut up a log.
	747
	748	Defined here for backwards compatibility with Python < 2.7.
	749	"""
	750	def emit(self, record):
	751	pass
[10676]	752
	753
	754	def check_csv_charset(iterable):
	755	"""Check contents of `iterable` regarding valid CSV encoding.
	756
	757	`iterable` is expected to be an iterable on _rows_ (not
	758	chars). This is true for instance for
	759	filehandlers. `zope.publisher.browser.FileUpload` instances are
	760	_not_ iterable, unfortunately.
	761
	762	Returns line num of first illegal char or ``None``. Line nums
	763	start counting with 1 (not zero).
	764	"""
	765	linenum = 1
	766	reader = csv.DictReader(iterable)
	767	try:
	768	for row in reader:
	769	linenum += 1
	770	except UnicodeDecodeError:
	771	return linenum
	772	except:
	773	return linenum + 1
	774	return None

Note: See TracBrowser for help on using the repository browser.

Download in other formats: