Context navigation

source: main/waeup.ikoba/trunk/src/waeup/ikoba/utils/helpers.py @ 12278

Last change on this file since 12278 was 12235, checked in by Henrik Bettermann, 10 years ago
html_dict must initially be a dict.
Property svn:keywords set to `Id`
File size: 25.4 KB

Rev	Line
[7196]	1	## $Id: helpers.py 12235 2014-12-14 22:15:37Z henrik $
	2	##
	3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
[11949]	18	"""General helper functions for Ikoba.
[4188]	19	"""
[10677]	20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
[8185]	21	import datetime
[8466]	22	import imghdr
[9593]	23	import logging
[4188]	24	import os
[8185]	25	import pytz
[4375]	26	import re
[4188]	27	import shutil
[8631]	28	import tempfile
[5731]	29	import grok
[5848]	30	from cStringIO import StringIO
	31	from docutils.core import publish_string
[7943]	32	from zope.component import getUtility
[5731]	33	from zope.component.interfaces import IFactory
[5734]	34	from zope.interface import implementedBy
[7941]	35	from zope.interface.interface import Method, Attribute
[6071]	36	from zope.schema import getFieldNames
	37	from zope.schema.fieldproperty import FieldProperty
[6372]	38	from zope.security.interfaces import NoInteraction
	39	from zope.security.management import getInteraction
[7175]	40	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
[12229]	41	from zope.formlib.widget import renderElement
[4188]	42
[6503]	43	BUFSIZE = 8 * 1024
[6372]	44
[10677]	45
[7186]	46	def remove_file_or_directory(filepath):
[4188]	47	"""Remove a file or directory.
[5738]	48
	49	Different to :func:`shutil.rmtree` we also accept not existing
	50	paths (returning silently) and if a dir turns out to be a regular
	51	file, we remove that.
[4188]	52	"""
	53	filepath = os.path.abspath(filepath)
	54	if not os.path.exists(filepath):
	55	return
	56	if os.path.isdir(filepath):
	57	shutil.rmtree(filepath)
	58	else:
	59	os.unlink(filepath)
	60	return
	61
[10677]	62
[7186]	63	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
[4188]	64	"""Copy contents of directory src to directory dst.
	65
	66	Both directories must exists.
	67
	68	If `overwrite` is true, any same named objects will be
	69	overwritten. Otherwise these files will not be touched.
	70
	71	If `del_old` is true, copied files and directories will be removed
	72	from the src directory.
	73
	74	This functions returns a list of non-copied files.
	75
	76	Unix hidden files and directories (starting with '.') are not
	77	processed by this function.
	78	"""
	79	if not os.path.exists(src):
	80	raise ValueError('source path does not exist: %s' % src)
	81	if not os.path.exists(dst):
	82	raise ValueError('destination path does not exist: %s' % dst)
	83	if not os.path.isdir(src):
	84	raise ValueError('source path is not a directory: %s' % src)
	85	if not os.path.isdir(dst):
	86	raise ValueError('destination path is not a directory: %s' % dst)
	87	not_copied = []
	88	for item in os.listdir(src):
	89	if item.startswith('.'):
[10677]	90	continue # We do not copy hidden stuff...
[4188]	91	itemsrc = os.path.join(src, item)
	92	itemdst = os.path.join(dst, item)
	93
	94	if os.path.exists(itemdst):
	95	if overwrite is True:
[7186]	96	remove_file_or_directory(itemdst)
[4188]	97	else:
	98	not_copied.append(item)
	99	continue
[6113]	100
[4188]	101	if os.path.isdir(itemsrc):
	102	shutil.copytree(itemsrc, itemdst)
	103	else:
	104	shutil.copy2(itemsrc, itemdst)
	105	if del_old:
[7186]	106	remove_file_or_directory(itemsrc)
[4188]	107	return not_copied
[4375]	108
	109
[7186]	110	def get_inner_HTML_part(html_code):
[4375]	111	"""Return the 'inner' part of a complete HTML snippet.
	112
	113	If there is a form part, get this.
	114
	115	If there is no form part, try to return the body part contents.
	116
	117	If there is no body, return as-is.
[5738]	118
	119	Let's see how that works. If we deliver some doc with form, we
	120	will get that form only:
	121
	122	>>> doc = '<html><form>My Form</form>Outside the form</html>'
[7186]	123	>>> get_inner_HTML_part(doc)
[5738]	124	'<form>My Form</form>'
	125
	126	No form? Then seek for a body part and get the contents:
	127
	128	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
[7186]	129	>>> get_inner_HTML_part(doc)
[5738]	130	'My Body'
	131
	132	If none of these is included, return what we got:
	133
	134	>>> doc = '<html>without body nor form</html>'
[7186]	135	>>> get_inner_HTML_part(doc)
[5738]	136	'<html>without body nor form</html>'
	137
[4375]	138	"""
	139
	140	try:
[5738]	141	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
[4375]	142	re.DOTALL).groups()[0]
	143	return result
	144	except AttributeError:
	145	# No <form> part included
	146	try:
	147	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
	148	re.DOTALL).groups()[0]
	149	return result
	150	except AttributeError:
	151	# No <form> and no <body> tag...
	152	pass
	153	return html_code
	154
[10677]	155
[5731]	156	class FactoryBase(grok.GlobalUtility):
	157	"""A factory for things.
	158
	159	This is a baseclass for easier creation of factories. Factories
	160	are utilities that are registered under a certain name and return
	161	instances of certain classes when called.
	162
[11949]	163	In :mod:`waeup.ikoba` we use factories extensively for
[7933]	164	batching. While processing a batch some processors looks up a
[5731]	165	factory to create real-world instances that then get filled with
	166	data from imported CSV files.
	167
	168	To get rid of reimplementing the same stuff over and over again,
	169	most notably the methods defined here, we offer this base class
	170	(which will not be registered as a factory itself).
	171
	172	Real factories can then be created like this:
	173
	174	>>> import grok
[11949]	175	>>> from waeup.ikoba.utils.helpers import FactoryBase
[5731]	176	>>> class MyObject(object):
	177	... # Some class we want to get instances of.
	178	... pass
	179	>>> class MyObjectFactory(FactoryBase):
	180	... # This is the factory for MyObject instances
[11949]	181	... grok.name(u'waeup.ikoba.factory.MyObject')
[5731]	182	... factory = MyObject
	183
	184	That's it. It is essential to set the ``factory`` attribute, which
	185	will determine the class of which instances should be created when
	186	called. The given name must even be unique amongst all utilities
	187	registered during runtime. While you can pick any name you like
[11949]	188	you might want to prepend ``waeup.ikoba.factory.`` to the name
[5731]	189	string to make sure it does not clash with names of other
	190	utilities one day.
	191
	192	Before all this works we have to grok the baseclass once and our
	193	freshly defined factory. This executes all the component
	194	registration stuff we don't want to do ourselves. In daily use
[11949]	195	this is done automatically on startup of a :mod:`waeup.ikoba`
[5731]	196	system.
[6113]	197
[11949]	198	>>> grok.testing.grok('waeup.ikoba.utils.helpers')
[5731]	199	>>> grok.testing.grok_component(
	200	... 'MyObjectFactory', MyObjectFactory
	201	... )
	202	True
	203
[7933]	204	After grokking we (and processors) can create objects without
[5731]	205	knowing about the location of the real class definition, just by
	206	the factory name:
	207
	208	>>> from zope.component import createObject
[11949]	209	>>> obj = createObject('waeup.ikoba.factory.MyObject')
[5731]	210	>>> isinstance(obj, MyObject)
	211	True
	212
	213	We can also use the regular utility lookups to find our new
	214	factory:
	215
	216	>>> from zope.component import getUtility
	217	>>> from zope.component.interfaces import IFactory
	218	>>> factory = getUtility(
[11949]	219	... IFactory, name='waeup.ikoba.factory.MyObject'
[5731]	220	... )
	221	>>> isinstance(factory, MyObjectFactory)
	222	True
	223
	224	And this factory generates `MyObject` instances:
	225
	226	>>> obj = factory()
	227	>>> isinstance(obj, MyObject)
	228	True
	229
	230	"""
[10677]	231	grok.baseclass() # Do not grok this class, do not register us.
[5731]	232	grok.implements(IFactory)
	233	# You can override any of the following attributes in derived
	234	# classes. The `grok.name` setting must even be set to some
	235	# unique value.
	236	grok.name(u'waeup.Factory')
	237	title = u"Create instances of ``factory``.",
[11947]	238	description = u"This factory instantiates new e.g. applicant instances."
[5731]	239	factory = None
	240
	241	def __call__(self, args, *kw):
	242	"""The main factory function.
	243
	244	Returns an instance of the requested object.
	245	"""
	246	return self.factory()
	247
	248	def getInterfaces(self):
	249	# Required by IFactory
	250	return implementedBy(self.factory)
[5848]	251
[10677]	252
[5848]	253	def ReST2HTML_w_warnings(source_string):
	254	"""Convert a reStructuredText string to HTML preserving warnings.
	255
	256	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
	257	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
[5876]	258	source string (in unicode), ``<WARNINGS>`` is a string containing
	259	any warning messages or ``None``.
[6113]	260
[5848]	261	Regular multi-line ReStructuredText strings will be returned as
	262	HTML code:
	263
[11949]	264	>>> from waeup.ikoba.utils.helpers import ReST2HTML
[5848]	265	>>> source = '''
	266	... Headline
	267	... ========
	268	...
	269	... - A list item
	270	... - Another item
	271	...
	272	... Thanks for watching!
	273	... '''
	274	>>> html, warnings = ReST2HTML_w_warnings(source)
	275	>>> print html
	276	<div class="document" id="headline">
	277	<h1 class="title">Headline</h1>
	278	<BLANKLINE>
	279	<ul class="simple">
	280	<li>A list item</li>
	281	<li>Another item</li>
	282	</ul>
	283	<p>Thanks for watching!</p>
	284	</div>
	285
	286	Here no warnings happened, so the `warnings` are ``None``:
	287
	288	>>> warnings is None
	289	True
[6113]	290
[5848]	291	If warnings happen then they can be retrieved in the returned
	292	``warnings``. We try to render an erraneous document:
	293
	294	>>> source = '''
	295	... Headline
	296	... ======
	297	...
	298	... Thanks for watching!
	299	... '''
	300	>>> html, warnings = ReST2HTML_w_warnings(source)
	301	>>> print html
	302	<div class="document" id="headline">
	303	<h1 class="title">Headline</h1>
	304	<BLANKLINE>
	305	<p>Thanks for watching!</p>
	306	</div>
	307
	308	>>> print warnings
	309	<string>:3: (WARNING/2) Title underline too short.
	310	<BLANKLINE>
	311	Headline
	312	======
	313	<BLANKLINE>
	314
	315	As you can see, the warnings are not displayed inline the document
	316	but can be retrieved from the returned warnings, which is a string
	317	or ``None``.
	318	"""
	319	warnings = StringIO()
	320	fulldoc = publish_string(
	321	source_string, writer_name='html4css1',
	322	settings_overrides={
	323	'report_level': 0,
	324	'warning_stream': warnings,
	325	})
	326	warnings.seek(0)
	327	warning_msgs = warnings.read()
	328	if warning_msgs:
	329	# Render again, this time with no warnings inline...
[10677]	330	fulldoc = publish_string(
[5848]	331	source_string, writer_name='html4css1',
	332	settings_overrides={
	333	'report_level': 10000,
	334	'halt_level': 10000,
	335	'warning_stream': warnings,
	336	})
	337	if warning_msgs == '':
	338	warning_msgs = None
[7186]	339	result = get_inner_HTML_part(fulldoc).strip()
[5876]	340	if not isinstance(result, unicode):
	341	result = result.decode('utf-8')
	342	return result, warning_msgs
[5848]	343
[10677]	344
[5848]	345	def ReST2HTML(source_string):
	346	"""Render a string containing ReStructuredText to HTML.
	347
	348	Any warnings about too short headings, etc. are silently
	349	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
	350	warnings.
	351
[5876]	352	The returned string will be unicode.
[6113]	353
[5848]	354	A regular document will be rendered like this:
	355
	356	>>> source = '''
	357	... Headline
	358	... ========
	359	...
	360	... Thanks for watching!
	361	... '''
	362	>>> html = ReST2HTML(source)
	363	>>> print html
	364	<div class="document" id="headline">
	365	<h1 class="title">Headline</h1>
	366	<BLANKLINE>
	367	<p>Thanks for watching!</p>
	368	</div>
	369
	370	A document with markup problems (here: the underline is too short)
	371	will look similar:
	372
	373	>>> source = '''
	374	... Headline
	375	... ======
	376	...
	377	... Thanks for watching!
	378	... '''
	379	>>> html = ReST2HTML(source)
	380	>>> print html
	381	<div class="document" id="headline">
	382	<h1 class="title">Headline</h1>
	383	<BLANKLINE>
	384	<p>Thanks for watching!</p>
	385	</div>
[6113]	386
[5848]	387	"""
	388	html, warnings = ReST2HTML_w_warnings(source_string)
	389	return html
[6071]	390
[10677]	391
[9689]	392	def attrs_to_fields(cls, omit=[]):
[6071]	393	"""Turn the attributes of a class into FieldProperty instances.
[6113]	394
	395	With Python >= 2.6 we can even use this function as a class decorator.
[9689]	396
	397	`omit` is a list of field names that should _not_ be turned into
	398	field properties. This is useful for properties and the like.
[6071]	399	"""
	400	iface = list(implementedBy(cls))[0]
	401	for field_name in getFieldNames(iface):
[9689]	402	if field_name in omit:
	403	continue
[11476]	404	field_property = FieldProperty(iface[field_name])
	405	# Set proper docstring for the API docs.
	406	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
	407	setattr(cls, field_name, field_property)
[6071]	408	return cls
[6372]	409
[10677]	410
[6372]	411	def get_current_principal():
	412	"""Get the 'current' principal.
	413
	414	This method works without a request. Examining a request is the
	415	regular (and recommended) way to get a principal involved
	416	'currently'.
	417
	418	Use this method only if you really have no access to the current
	419	request.
	420
	421	Returns ``None`` when no principal is involved (for instance
	422	during tests).
	423	"""
	424	try:
	425	principal = getInteraction().participations[0].principal
	426	except NoInteraction:
	427	return None
[10677]	428	except IndexError: # No participations present
[6372]	429	return None
	430	return principal
[6503]	431
[10677]	432
[6503]	433	def cmp_files(file_descr1, file_descr2):
	434	"""Compare two files by their file descriptors.
	435
	436	Returns ``True`` if both are equal, ``False`` otherwise.
	437	"""
[6531]	438	file_descr1.seek(0)
	439	file_descr2.seek(0)
[6503]	440	while True:
	441	b1 = file_descr1.read(BUFSIZE)
	442	b2 = file_descr2.read(BUFSIZE)
	443	if b1 != b2:
	444	return False
	445	if not b1:
	446	return True
[7078]	447
[10677]	448
[7078]	449	def string_from_bytes(number):
	450	"""Turn a number into some textual representation.
	451
	452	Examples:
	453
	454	>>> string_from_bytes(1)
	455	u'1 byte(s)'
	456
	457	>>> string_from_bytes(1025)
	458	u'1 KB'
	459
	460	>>> string_from_bytes(1.5 * 1024*1024)
	461	u'1.50 MB'
	462
	463	>>> string_from_bytes(673.286 * 1024**3)
	464	u'673.29 GB'
	465
	466	"""
	467	if number < 1024:
	468	return u'%s byte(s)' % (str(number),)
[10677]	469	elif number < 1024 ** 2:
[7078]	470	return u'%s KB' % (number / 1024,)
[10677]	471	elif number < 1024 ** 3:
	472	return u'%.2f MB' % (number / 1024 ** 2,)
	473	return u'%.2f GB' % (number / 1024 ** 3,)
[7079]	474
[10677]	475
[7079]	476	def file_size(file_like_obj):
	477	"""Determine file size in most effective manner.
	478
	479	Returns the number of bytes in a file. This function works for
	480	both, real files as well as file-like objects like cStringIO based
	481	'files'.
	482
	483	Example:
	484
	485	>>> from cStringIO import StringIO
	486	>>> file_size(StringIO('my file content'))
	487	15
	488
	489	Please note that this function expects the file-like object passed
	490	in to be at first reading position (it does no seek(0)) and that
	491	when finished the file pointer might be at end of file.
	492	"""
	493	if hasattr(file_like_obj, 'fileno'):
	494	return os.fstat(file_like_obj.fileno())[6]
[10677]	495	file_like_obj.seek(0, 2) # seek to last position in file
[7079]	496	return file_like_obj.tell()
[7175]	497
[10677]	498
[7175]	499	def get_user_account(request):
	500	"""Return local user account.
	501	"""
	502	principal_id = request.principal.id
[7234]	503	authenticator = getUtility(IAuthenticatorPlugin, name='users')
	504	account = authenticator.getAccount(principal_id)
[7175]	505	return account
[7941]	506
[10677]	507
[7941]	508	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
	509	"""Get all attribute names of an interface.
	510
	511	Searches also base interfaces.
	512
	513	Names of fields that are pure attributes
	514	(i.e. zope.interface.Attribute) or methods are excluded by
	515	default.
	516
	517	Names of typical fields derived from zope.schema are included.
	518
	519	The `omit` paramter can give a list of names to exclude.
	520
	521	Returns an unsorted list of strings.
	522	"""
[8370]	523	ifaces = set((iface,))
	524	# Collect all interfaces (also bases) recursively
	525	while True:
	526	ext_ifaces = set(ifaces)
	527	for iface in ext_ifaces:
	528	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
	529	if ext_ifaces == ifaces:
	530	# No new interfaces found, list complete
	531	break
	532	ifaces = ext_ifaces
	533	# Collect (filtered) names of collected interfaces
[7941]	534	result = []
[8370]	535	for iface in ifaces:
	536	for name, descr in iface.namesAndDescriptions():
	537	if name in omit:
	538	continue
	539	if exclude_attribs and descr.__class__ is Attribute:
	540	continue
	541	if exclude_methods and isinstance(descr, Method):
	542	continue
[9043]	543	if name in result:
	544	continue
[8370]	545	result.append(name)
[7941]	546	return result
[7968]	547
[10677]	548
[7968]	549	def get_sorted_preferred(tuples_iterable, preferred_list):
	550	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
	551	`preferred_list` put in front.
	552
	553	The rest of the tuples iterable is returned in orginal order. This
	554	is useful for putting default entries on top of (already sorted)
	555	lists of choice values, for instance when sorting countries and
	556	their code.
	557
	558	Sample:
	559
	560	We have a list of tuples with uppercase 'titles' and lowercase
	561	'tokens'. This list is already sorted but we want certain values
	562	of this list to show up before other values. For instance we want
	563	to see the 'C' entry to come first.
	564
	565	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	566	... ['c'])
	567	(('C', 'c'), ('A', 'a'), ('B', 'b'))
	568
	569	i.e. the entry with 'c' as second value moved to head of result.
	570
	571	We can also require multiple entries at head of list:
	572
	573	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	574	... ['b', 'c'])
	575	(('B', 'b'), ('C', 'c'), ('A', 'a'))
	576
	577	We required the 'b' entry to come before the 'c' entry and then
	578	the rest of the input list. That's what we got.
	579
	580	The result is returned as a tuple of tuples to keep order of values.
	581	"""
	582	result = [None for x in preferred_list]
	583	for title, code in tuples_iterable:
	584	if code in preferred_list:
	585	index = preferred_list.index(code)
	586	result[index] = (title, code)
	587	else:
	588	result.append((title, code))
	589	return tuple(result)
[8185]	590
[10677]	591
[8185]	592	def now(tz=None):
	593	"""Get current datetime in timezone of `tz`.
	594
	595	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
	596
	597	`tz` should be a timezone as defined in pytz.
	598	"""
	599	return to_timezone(datetime.datetime.utcnow(), tz=tz)
	600
[10677]	601
[8185]	602	def to_timezone(dt, tz=None):
	603	"""Shift datetime into timezone `tz`.
	604
	605	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
	606	assumed to be UTC.
	607
	608	If no `tz` is given, shift to UTC is performed.
[8192]	609
	610	If `dt` is not a datetime.datetime, the input value is returned
	611	unchanged.
[8185]	612	"""
[8192]	613	if not isinstance(dt, datetime.datetime):
	614	return dt
[8185]	615	if tz is None:
	616	tz = pytz.utc
	617	if dt.tzinfo is None:
	618	dt = pytz.utc.localize(dt)
	619	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
[8466]	620
[10677]	621
[11660]	622	def imghdr_test_fpm(h, f):
	623	"""FPM fileformat test.
	624
	625	The `fpm` fileformat is the binary fingerprint data as created by
	626	`libfprint`.
	627	"""
	628	if len(h) >= 3 and h[:3] == 'FP1':
	629	return 'fpm'
	630
	631
	632	#: Add test function in stdlib's imghdr tests.
	633	imghdr.tests.append(imghdr_test_fpm)
	634
	635
[8466]	636	def get_fileformat(path, bytestream=None):
	637	"""Try to determine the file format of a given media file.
	638
	639	Although checks done here are not done very thoroughly, they make
	640	no assumptions about the filetype by looking at its filename
	641	extension or similar. Instead they check header data to comply
	642	with common known rules (Magic Words).
	643
	644	If bytestream is not `None` the `path` is ignored.
	645
	646	Returns filetype as string (something like ``'jpg'``) if
	647	file-format can be recognized, ``None`` else.
	648
[11660]	649	Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
	650	`pdf`.
[8466]	651
[11949]	652	More filetypes (though untested in waeup.ikoba) are automatically
[8466]	653	recognized because we deploy the stdlib `imghdr` library. See this
	654	module's docs for a complete list of filetypes recognized.
	655	"""
	656	if path is None and bytestream is None:
	657	return None
	658
	659	img_type = None
	660	if bytestream is not None:
	661	img_type = imghdr.what(path, bytestream)
	662	else:
	663	img_type = imghdr.what(path)
	664	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
	665	if img_type == name:
	666	img_type = replacement
	667	return img_type
	668
[10677]	669
[8466]	670	def check_pdf(bytestream, file):
	671	"""Tell whether a file or bytestream is a PDF file.
	672
	673	Works as a test/plugin for the stdlib `imghdr` library.
	674	"""
	675	if file is not None:
	676	file.seek(0)
	677	bytestream = file.read(4)
	678	file.seek(0)
	679
	680	if bytestream.startswith('%PDF'):
	681	return 'pdf'
	682	return None
	683
	684	# register check_pdf as header check function with `imghdr`
	685	if check_pdf not in imghdr.tests:
	686	imghdr.tests.append(check_pdf)
[8631]	687
[10677]	688
[8631]	689	def merge_csv_files(path1, path2):
	690	"""Merge two CSV files into one (appending).
	691
	692	CSV data from `path2` will be merged into `path1` csv file. This
	693	is a bit like 'appending' data from path2 to data from path1.
	694
	695	The path of the resulting temporary file will be returned.
	696
	697	In the result file data from `path2` will always come _after_ data
	698	from `path1`.
	699
	700	Caution: It is the _callers_ responsibility to remove the
	701	result file (which is created by tempfile.mkstemp) after usage.
	702
	703	This CSV file merging copes with different column orders in both
	704	CSV files and even with different column sets in both files.
[8633]	705
	706	Also broken/empty CSV files can be handled.
[8631]	707	"""
	708	# sniff the col names
[8633]	709	try:
	710	row10 = csv.DictReader(open(path1, 'rb')).next()
	711	except StopIteration:
	712	row10 = dict()
	713	try:
	714	row20 = csv.DictReader(open(path2, 'rb')).next()
	715	except StopIteration:
	716	row20 = dict()
[8631]	717	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
	718	# now read/write the real data
	719	reader1 = csv.DictReader(open(path1, 'rb'))
	720	reader2 = csv.DictReader(open(path2, 'rb'))
	721	wp, tmp_path = tempfile.mkstemp()
	722	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
[10677]	723	writer.writerow(dict((x, x) for x in fieldnames)) # header
[8631]	724	for row in reader1:
	725	writer.writerow(row)
	726	for row in reader2:
	727	writer.writerow(row)
	728	return tmp_path
[9372]	729
[10677]	730
[9372]	731	def product(sequence, start=1):
	732	"""Returns the product of a sequence of numbers (_not_ strings)
	733	multiplied by the parameter `start` (defaults to 1). If the
	734	sequence is empty, returns 0.
	735	"""
	736	if not len(sequence):
	737	return 0
	738	result = start
	739	for item in sequence:
	740	result *= item
	741	return result
[9593]	742
[10677]	743
[9593]	744	class NullHandler(logging.Handler):
	745	"""A logging NullHandler.
	746
	747	Does not log anything. Useful if you want to shut up a log.
	748
	749	Defined here for backwards compatibility with Python < 2.7.
	750	"""
	751	def emit(self, record):
	752	pass
[10676]	753
	754
	755	def check_csv_charset(iterable):
	756	"""Check contents of `iterable` regarding valid CSV encoding.
	757
	758	`iterable` is expected to be an iterable on _rows_ (not
	759	chars). This is true for instance for
	760	filehandlers. `zope.publisher.browser.FileUpload` instances are
	761	_not_ iterable, unfortunately.
	762
	763	Returns line num of first illegal char or ``None``. Line nums
	764	start counting with 1 (not zero).
	765	"""
	766	linenum = 1
	767	reader = csv.DictReader(iterable)
	768	try:
	769	for row in reader:
	770	linenum += 1
	771	except UnicodeDecodeError:
	772	return linenum
	773	except:
	774	return linenum + 1
	775	return None
[11824]	776
	777
	778	class MemInfo(dict):
	779	"""A dict with access to its items like if they are attributes.
	780	"""
	781	__getattr__ = dict.__getitem__
	782	__setattr__ = dict.__setitem__
	783	__delattr__ = dict.__delitem__
	784
	785
	786	def get_meminfo(src="/proc/meminfo"):
	787	"""Get local memory info as provided in /proc/meminfo.
	788
	789	Entries in /proc/meminfo are available as MemInfo attributes.
	790
	791	By default we lookup a file /proc/meminfo. Another path can be
	792	lines = open(src, 'r').read()passed in as `src` parameter. In this
	793	case `src` must be a regular file and contain meminfo-style data.
	794
	795	If the given `src` (or `/proc/meminfo`) are not available, `None`
	796	lines = open(src, 'r').read()is returned.
	797	"""
	798	if not os.path.isfile(src):
	799	return None
	800	lines = open(src, 'r').read().splitlines()
	801	result = MemInfo()
	802	for line in lines:
	803	key, value = line.split(':', 1)
	804	value = int(value.split(' kB', 1)[0])
	805	result[key] = value
	806	return result
[12229]	807
	808	def html2dict(value=None,portal_language='en'):
	809	"""Transforms a localized HTML text string into a dictionary.
	810
	811	Different languages must be separated by `>>xy<<` whereas
	812	xy is the language code. Text parts without correct leading
	813	language separator - usually the first part has no language
	814	descriptor - are interpreted as texts in the portal's language.
	815	The latter can be configured in waeup.srp.utils.utils.IkobaUtils.
	816	"""
	817	try:
	818	parts = value.split('>>')
	819	except:
	820	return {}
	821	elements = {}
	822	lang = portal_language
	823	for part in parts:
	824	if part[2:4] == u'<<':
	825	lang = part[0:2].lower()
	826	text = part[4:]
	827	elements[lang] = renderElement(u'div id="html"',
	828	contents=text)
	829	else:
	830	text = part
	831	elements[lang] = renderElement(u'div id="html"',
	832	contents=text)
	833	return elements

Note: See TracBrowser for help on using the repository browser.

Download in other formats: