Context navigation

helpers.py @ 16536

Last change on this file since 16536 was 15748, checked in by uli, 5 years ago

Bad workaround to cope with overcrowded catalogs.

Zope does not cope well with catalogs of more than 500.000 elements.
Reindexing the catalog then means to put all objects into memory before
saving them - this can blow everything.

We therefore use a dirty hack to replace the updateIndex method of
a catalog with a more careful function, that, however, should not be
run in production mode. No new items should be added during the run.

In the long run, we certainly need something more sustainable.

Property svn:keywords set to Id

File size: 29.8 KB

Rev	Line
[7196]	1	## $Id: helpers.py 15748 2019-11-04 10:15:44Z uli $
	2	##
	3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
[7819]	18	"""General helper functions for Kofa.
[4188]	19	"""
[10677]	20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
[8185]	21	import datetime
[8466]	22	import imghdr
[9593]	23	import logging
[4188]	24	import os
[8185]	25	import pytz
[4375]	26	import re
[4188]	27	import shutil
[8631]	28	import tempfile
[15748]	29	import transaction
[5731]	30	import grok
[5848]	31	from cStringIO import StringIO
	32	from docutils.core import publish_string
[15595]	33	from HTMLParser import HTMLParser
[7943]	34	from zope.component import getUtility
[5731]	35	from zope.component.interfaces import IFactory
[5734]	36	from zope.interface import implementedBy
[7941]	37	from zope.interface.interface import Method, Attribute
[15739]	38	from zope.intid.interfaces import IIntIds
[6071]	39	from zope.schema import getFieldNames
	40	from zope.schema.fieldproperty import FieldProperty
[6372]	41	from zope.security.interfaces import NoInteraction
	42	from zope.security.management import getInteraction
[7175]	43	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
[12231]	44	from zope.formlib.widget import renderElement
[4188]	45
[15739]	46
[6503]	47	BUFSIZE = 8 * 1024
[6372]	48
[10677]	49
[7186]	50	def remove_file_or_directory(filepath):
[4188]	51	"""Remove a file or directory.
[5738]	52
	53	Different to :func:`shutil.rmtree` we also accept not existing
	54	paths (returning silently) and if a dir turns out to be a regular
	55	file, we remove that.
[4188]	56	"""
	57	filepath = os.path.abspath(filepath)
	58	if not os.path.exists(filepath):
	59	return
	60	if os.path.isdir(filepath):
	61	shutil.rmtree(filepath)
	62	else:
	63	os.unlink(filepath)
	64	return
	65
[10677]	66
[7186]	67	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
[4188]	68	"""Copy contents of directory src to directory dst.
	69
	70	Both directories must exists.
	71
	72	If `overwrite` is true, any same named objects will be
	73	overwritten. Otherwise these files will not be touched.
	74
	75	If `del_old` is true, copied files and directories will be removed
	76	from the src directory.
	77
	78	This functions returns a list of non-copied files.
	79
	80	Unix hidden files and directories (starting with '.') are not
	81	processed by this function.
	82	"""
	83	if not os.path.exists(src):
	84	raise ValueError('source path does not exist: %s' % src)
	85	if not os.path.exists(dst):
	86	raise ValueError('destination path does not exist: %s' % dst)
	87	if not os.path.isdir(src):
	88	raise ValueError('source path is not a directory: %s' % src)
	89	if not os.path.isdir(dst):
	90	raise ValueError('destination path is not a directory: %s' % dst)
	91	not_copied = []
	92	for item in os.listdir(src):
	93	if item.startswith('.'):
[10677]	94	continue # We do not copy hidden stuff...
[4188]	95	itemsrc = os.path.join(src, item)
	96	itemdst = os.path.join(dst, item)
	97
	98	if os.path.exists(itemdst):
	99	if overwrite is True:
[7186]	100	remove_file_or_directory(itemdst)
[4188]	101	else:
	102	not_copied.append(item)
	103	continue
[6113]	104
[4188]	105	if os.path.isdir(itemsrc):
	106	shutil.copytree(itemsrc, itemdst)
	107	else:
	108	shutil.copy2(itemsrc, itemdst)
	109	if del_old:
[7186]	110	remove_file_or_directory(itemsrc)
[4188]	111	return not_copied
[4375]	112
	113
[7186]	114	def get_inner_HTML_part(html_code):
[4375]	115	"""Return the 'inner' part of a complete HTML snippet.
	116
	117	If there is a form part, get this.
	118
	119	If there is no form part, try to return the body part contents.
	120
	121	If there is no body, return as-is.
[5738]	122
	123	Let's see how that works. If we deliver some doc with form, we
	124	will get that form only:
	125
	126	>>> doc = '<html><form>My Form</form>Outside the form</html>'
[7186]	127	>>> get_inner_HTML_part(doc)
[5738]	128	'<form>My Form</form>'
	129
	130	No form? Then seek for a body part and get the contents:
	131
	132	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
[7186]	133	>>> get_inner_HTML_part(doc)
[5738]	134	'My Body'
	135
	136	If none of these is included, return what we got:
	137
	138	>>> doc = '<html>without body nor form</html>'
[7186]	139	>>> get_inner_HTML_part(doc)
[5738]	140	'<html>without body nor form</html>'
	141
[4375]	142	"""
	143
	144	try:
[5738]	145	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
[4375]	146	re.DOTALL).groups()[0]
	147	return result
	148	except AttributeError:
	149	# No <form> part included
	150	try:
	151	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
	152	re.DOTALL).groups()[0]
	153	return result
	154	except AttributeError:
	155	# No <form> and no <body> tag...
	156	pass
	157	return html_code
	158
[10677]	159
[5731]	160	class FactoryBase(grok.GlobalUtility):
	161	"""A factory for things.
	162
	163	This is a baseclass for easier creation of factories. Factories
	164	are utilities that are registered under a certain name and return
	165	instances of certain classes when called.
	166
[7811]	167	In :mod:`waeup.kofa` we use factories extensively for
[7933]	168	batching. While processing a batch some processors looks up a
[5731]	169	factory to create real-world instances that then get filled with
	170	data from imported CSV files.
	171
	172	To get rid of reimplementing the same stuff over and over again,
	173	most notably the methods defined here, we offer this base class
	174	(which will not be registered as a factory itself).
	175
	176	Real factories can then be created like this:
	177
	178	>>> import grok
[7811]	179	>>> from waeup.kofa.utils.helpers import FactoryBase
[5731]	180	>>> class MyObject(object):
	181	... # Some class we want to get instances of.
	182	... pass
	183	>>> class MyObjectFactory(FactoryBase):
	184	... # This is the factory for MyObject instances
[7811]	185	... grok.name(u'waeup.kofa.factory.MyObject')
[5731]	186	... factory = MyObject
	187
	188	That's it. It is essential to set the ``factory`` attribute, which
	189	will determine the class of which instances should be created when
	190	called. The given name must even be unique amongst all utilities
	191	registered during runtime. While you can pick any name you like
[7811]	192	you might want to prepend ``waeup.kofa.factory.`` to the name
[5731]	193	string to make sure it does not clash with names of other
	194	utilities one day.
	195
	196	Before all this works we have to grok the baseclass once and our
	197	freshly defined factory. This executes all the component
	198	registration stuff we don't want to do ourselves. In daily use
[7811]	199	this is done automatically on startup of a :mod:`waeup.kofa`
[5731]	200	system.
[6113]	201
[7811]	202	>>> grok.testing.grok('waeup.kofa.utils.helpers')
[5731]	203	>>> grok.testing.grok_component(
	204	... 'MyObjectFactory', MyObjectFactory
	205	... )
	206	True
	207
[7933]	208	After grokking we (and processors) can create objects without
[5731]	209	knowing about the location of the real class definition, just by
	210	the factory name:
	211
	212	>>> from zope.component import createObject
[7811]	213	>>> obj = createObject('waeup.kofa.factory.MyObject')
[5731]	214	>>> isinstance(obj, MyObject)
	215	True
	216
	217	We can also use the regular utility lookups to find our new
	218	factory:
	219
	220	>>> from zope.component import getUtility
	221	>>> from zope.component.interfaces import IFactory
	222	>>> factory = getUtility(
[7811]	223	... IFactory, name='waeup.kofa.factory.MyObject'
[5731]	224	... )
	225	>>> isinstance(factory, MyObjectFactory)
	226	True
	227
	228	And this factory generates `MyObject` instances:
	229
	230	>>> obj = factory()
	231	>>> isinstance(obj, MyObject)
	232	True
	233
	234	"""
[10677]	235	grok.baseclass() # Do not grok this class, do not register us.
[5731]	236	grok.implements(IFactory)
	237	# You can override any of the following attributes in derived
	238	# classes. The `grok.name` setting must even be set to some
	239	# unique value.
	240	grok.name(u'waeup.Factory')
	241	title = u"Create instances of ``factory``.",
	242	description = u"This factory instantiates new applicant instances."
	243	factory = None
	244
	245	def __call__(self, args, *kw):
	246	"""The main factory function.
	247
	248	Returns an instance of the requested object.
	249	"""
	250	return self.factory()
	251
	252	def getInterfaces(self):
	253	# Required by IFactory
	254	return implementedBy(self.factory)
[5848]	255
[10677]	256
[5848]	257	def ReST2HTML_w_warnings(source_string):
	258	"""Convert a reStructuredText string to HTML preserving warnings.
	259
	260	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
	261	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
[5876]	262	source string (in unicode), ``<WARNINGS>`` is a string containing
	263	any warning messages or ``None``.
[6113]	264
[5848]	265	Regular multi-line ReStructuredText strings will be returned as
	266	HTML code:
	267
[7811]	268	>>> from waeup.kofa.utils.helpers import ReST2HTML
[5848]	269	>>> source = '''
	270	... Headline
	271	... ========
	272	...
	273	... - A list item
	274	... - Another item
	275	...
	276	... Thanks for watching!
	277	... '''
	278	>>> html, warnings = ReST2HTML_w_warnings(source)
	279	>>> print html
	280	<div class="document" id="headline">
	281	<h1 class="title">Headline</h1>
	282	<BLANKLINE>
	283	<ul class="simple">
	284	<li>A list item</li>
	285	<li>Another item</li>
	286	</ul>
	287	<p>Thanks for watching!</p>
	288	</div>
	289
	290	Here no warnings happened, so the `warnings` are ``None``:
	291
	292	>>> warnings is None
	293	True
[6113]	294
[5848]	295	If warnings happen then they can be retrieved in the returned
	296	``warnings``. We try to render an erraneous document:
	297
	298	>>> source = '''
	299	... Headline
	300	... ======
	301	...
	302	... Thanks for watching!
	303	... '''
	304	>>> html, warnings = ReST2HTML_w_warnings(source)
	305	>>> print html
	306	<div class="document" id="headline">
	307	<h1 class="title">Headline</h1>
	308	<BLANKLINE>
	309	<p>Thanks for watching!</p>
	310	</div>
	311
	312	>>> print warnings
	313	<string>:3: (WARNING/2) Title underline too short.
	314	<BLANKLINE>
	315	Headline
	316	======
	317	<BLANKLINE>
	318
	319	As you can see, the warnings are not displayed inline the document
	320	but can be retrieved from the returned warnings, which is a string
	321	or ``None``.
	322	"""
	323	warnings = StringIO()
	324	fulldoc = publish_string(
	325	source_string, writer_name='html4css1',
	326	settings_overrides={
	327	'report_level': 0,
	328	'warning_stream': warnings,
	329	})
	330	warnings.seek(0)
	331	warning_msgs = warnings.read()
	332	if warning_msgs:
	333	# Render again, this time with no warnings inline...
[10677]	334	fulldoc = publish_string(
[5848]	335	source_string, writer_name='html4css1',
	336	settings_overrides={
	337	'report_level': 10000,
	338	'halt_level': 10000,
	339	'warning_stream': warnings,
	340	})
	341	if warning_msgs == '':
	342	warning_msgs = None
[7186]	343	result = get_inner_HTML_part(fulldoc).strip()
[5876]	344	if not isinstance(result, unicode):
	345	result = result.decode('utf-8')
	346	return result, warning_msgs
[5848]	347
[10677]	348
[5848]	349	def ReST2HTML(source_string):
	350	"""Render a string containing ReStructuredText to HTML.
	351
	352	Any warnings about too short headings, etc. are silently
	353	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
	354	warnings.
	355
[5876]	356	The returned string will be unicode.
[6113]	357
[5848]	358	A regular document will be rendered like this:
	359
	360	>>> source = '''
	361	... Headline
	362	... ========
	363	...
	364	... Thanks for watching!
	365	... '''
	366	>>> html = ReST2HTML(source)
	367	>>> print html
	368	<div class="document" id="headline">
	369	<h1 class="title">Headline</h1>
	370	<BLANKLINE>
	371	<p>Thanks for watching!</p>
	372	</div>
	373
	374	A document with markup problems (here: the underline is too short)
	375	will look similar:
	376
	377	>>> source = '''
	378	... Headline
	379	... ======
	380	...
	381	... Thanks for watching!
	382	... '''
	383	>>> html = ReST2HTML(source)
	384	>>> print html
	385	<div class="document" id="headline">
	386	<h1 class="title">Headline</h1>
	387	<BLANKLINE>
	388	<p>Thanks for watching!</p>
	389	</div>
[6113]	390
[5848]	391	"""
	392	html, warnings = ReST2HTML_w_warnings(source_string)
	393	return html
[6071]	394
[10677]	395
[9689]	396	def attrs_to_fields(cls, omit=[]):
[12974]	397	"""Set class attributes and bind them to the data definitions
	398	specified in the interface by turning the attributes into FieldProperty
	399	instances.
[6113]	400
	401	With Python >= 2.6 we can even use this function as a class decorator.
[9689]	402
	403	`omit` is a list of field names that should _not_ be turned into
	404	field properties. This is useful for properties and the like.
[6071]	405	"""
	406	iface = list(implementedBy(cls))[0]
	407	for field_name in getFieldNames(iface):
[9689]	408	if field_name in omit:
	409	continue
[11476]	410	field_property = FieldProperty(iface[field_name])
	411	# Set proper docstring for the API docs.
	412	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
	413	setattr(cls, field_name, field_property)
[6071]	414	return cls
[6372]	415
[10677]	416
[6372]	417	def get_current_principal():
	418	"""Get the 'current' principal.
	419
	420	This method works without a request. Examining a request is the
	421	regular (and recommended) way to get a principal involved
	422	'currently'.
	423
	424	Use this method only if you really have no access to the current
	425	request.
	426
	427	Returns ``None`` when no principal is involved (for instance
	428	during tests).
	429	"""
	430	try:
	431	principal = getInteraction().participations[0].principal
	432	except NoInteraction:
	433	return None
[10677]	434	except IndexError: # No participations present
[6372]	435	return None
	436	return principal
[6503]	437
[10677]	438
[6503]	439	def cmp_files(file_descr1, file_descr2):
	440	"""Compare two files by their file descriptors.
	441
	442	Returns ``True`` if both are equal, ``False`` otherwise.
	443	"""
[6531]	444	file_descr1.seek(0)
	445	file_descr2.seek(0)
[6503]	446	while True:
	447	b1 = file_descr1.read(BUFSIZE)
	448	b2 = file_descr2.read(BUFSIZE)
	449	if b1 != b2:
	450	return False
	451	if not b1:
	452	return True
[7078]	453
[10677]	454
[7078]	455	def string_from_bytes(number):
	456	"""Turn a number into some textual representation.
	457
	458	Examples:
	459
	460	>>> string_from_bytes(1)
	461	u'1 byte(s)'
	462
	463	>>> string_from_bytes(1025)
	464	u'1 KB'
	465
	466	>>> string_from_bytes(1.5 * 1024*1024)
	467	u'1.50 MB'
	468
	469	>>> string_from_bytes(673.286 * 1024**3)
	470	u'673.29 GB'
	471
	472	"""
	473	if number < 1024:
	474	return u'%s byte(s)' % (str(number),)
[10677]	475	elif number < 1024 ** 2:
[7078]	476	return u'%s KB' % (number / 1024,)
[10677]	477	elif number < 1024 ** 3:
	478	return u'%.2f MB' % (number / 1024 ** 2,)
	479	return u'%.2f GB' % (number / 1024 ** 3,)
[7079]	480
[10677]	481
[7079]	482	def file_size(file_like_obj):
	483	"""Determine file size in most effective manner.
	484
	485	Returns the number of bytes in a file. This function works for
	486	both, real files as well as file-like objects like cStringIO based
	487	'files'.
	488
	489	Example:
	490
	491	>>> from cStringIO import StringIO
	492	>>> file_size(StringIO('my file content'))
	493	15
	494
	495	Please note that this function expects the file-like object passed
	496	in to be at first reading position (it does no seek(0)) and that
	497	when finished the file pointer might be at end of file.
	498	"""
	499	if hasattr(file_like_obj, 'fileno'):
	500	return os.fstat(file_like_obj.fileno())[6]
[10677]	501	file_like_obj.seek(0, 2) # seek to last position in file
[7079]	502	return file_like_obj.tell()
[7175]	503
[10677]	504
[7175]	505	def get_user_account(request):
	506	"""Return local user account.
	507	"""
	508	principal_id = request.principal.id
[7234]	509	authenticator = getUtility(IAuthenticatorPlugin, name='users')
	510	account = authenticator.getAccount(principal_id)
[7175]	511	return account
[7941]	512
[10677]	513
[7941]	514	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
	515	"""Get all attribute names of an interface.
	516
	517	Searches also base interfaces.
	518
	519	Names of fields that are pure attributes
	520	(i.e. zope.interface.Attribute) or methods are excluded by
	521	default.
	522
	523	Names of typical fields derived from zope.schema are included.
	524
	525	The `omit` paramter can give a list of names to exclude.
	526
	527	Returns an unsorted list of strings.
	528	"""
[8370]	529	ifaces = set((iface,))
	530	# Collect all interfaces (also bases) recursively
	531	while True:
	532	ext_ifaces = set(ifaces)
	533	for iface in ext_ifaces:
	534	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
	535	if ext_ifaces == ifaces:
	536	# No new interfaces found, list complete
	537	break
	538	ifaces = ext_ifaces
	539	# Collect (filtered) names of collected interfaces
[7941]	540	result = []
[8370]	541	for iface in ifaces:
	542	for name, descr in iface.namesAndDescriptions():
	543	if name in omit:
	544	continue
	545	if exclude_attribs and descr.__class__ is Attribute:
	546	continue
	547	if exclude_methods and isinstance(descr, Method):
	548	continue
[9043]	549	if name in result:
	550	continue
[8370]	551	result.append(name)
[7941]	552	return result
[7968]	553
[10677]	554
[7968]	555	def get_sorted_preferred(tuples_iterable, preferred_list):
	556	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
	557	`preferred_list` put in front.
	558
	559	The rest of the tuples iterable is returned in orginal order. This
	560	is useful for putting default entries on top of (already sorted)
	561	lists of choice values, for instance when sorting countries and
	562	their code.
	563
	564	Sample:
	565
	566	We have a list of tuples with uppercase 'titles' and lowercase
	567	'tokens'. This list is already sorted but we want certain values
	568	of this list to show up before other values. For instance we want
	569	to see the 'C' entry to come first.
	570
	571	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	572	... ['c'])
	573	(('C', 'c'), ('A', 'a'), ('B', 'b'))
	574
	575	i.e. the entry with 'c' as second value moved to head of result.
	576
	577	We can also require multiple entries at head of list:
	578
	579	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
	580	... ['b', 'c'])
	581	(('B', 'b'), ('C', 'c'), ('A', 'a'))
	582
	583	We required the 'b' entry to come before the 'c' entry and then
	584	the rest of the input list. That's what we got.
	585
	586	The result is returned as a tuple of tuples to keep order of values.
	587	"""
	588	result = [None for x in preferred_list]
	589	for title, code in tuples_iterable:
	590	if code in preferred_list:
	591	index = preferred_list.index(code)
	592	result[index] = (title, code)
	593	else:
	594	result.append((title, code))
	595	return tuple(result)
[8185]	596
[10677]	597
[8185]	598	def now(tz=None):
	599	"""Get current datetime in timezone of `tz`.
	600
	601	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
	602
	603	`tz` should be a timezone as defined in pytz.
	604	"""
	605	return to_timezone(datetime.datetime.utcnow(), tz=tz)
	606
[10677]	607
[8185]	608	def to_timezone(dt, tz=None):
	609	"""Shift datetime into timezone `tz`.
	610
	611	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
	612	assumed to be UTC.
	613
	614	If no `tz` is given, shift to UTC is performed.
[8192]	615
	616	If `dt` is not a datetime.datetime, the input value is returned
	617	unchanged.
[8185]	618	"""
[8192]	619	if not isinstance(dt, datetime.datetime):
	620	return dt
[8185]	621	if tz is None:
	622	tz = pytz.utc
	623	if dt.tzinfo is None:
	624	dt = pytz.utc.localize(dt)
	625	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
[8466]	626
[10677]	627
[11660]	628	def imghdr_test_fpm(h, f):
	629	"""FPM fileformat test.
	630
	631	The `fpm` fileformat is the binary fingerprint data as created by
	632	`libfprint`.
	633	"""
	634	if len(h) >= 3 and h[:3] == 'FP1':
	635	return 'fpm'
	636
	637
	638	#: Add test function in stdlib's imghdr tests.
	639	imghdr.tests.append(imghdr_test_fpm)
	640
	641
[8466]	642	def get_fileformat(path, bytestream=None):
	643	"""Try to determine the file format of a given media file.
	644
	645	Although checks done here are not done very thoroughly, they make
	646	no assumptions about the filetype by looking at its filename
	647	extension or similar. Instead they check header data to comply
	648	with common known rules (Magic Words).
	649
	650	If bytestream is not `None` the `path` is ignored.
	651
	652	Returns filetype as string (something like ``'jpg'``) if
	653	file-format can be recognized, ``None`` else.
	654
[11660]	655	Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
	656	`pdf`.
[8466]	657
	658	More filetypes (though untested in waeup.kofa) are automatically
	659	recognized because we deploy the stdlib `imghdr` library. See this
	660	module's docs for a complete list of filetypes recognized.
	661	"""
	662	if path is None and bytestream is None:
	663	return None
	664
	665	img_type = None
	666	if bytestream is not None:
	667	img_type = imghdr.what(path, bytestream)
	668	else:
	669	img_type = imghdr.what(path)
	670	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
	671	if img_type == name:
	672	img_type = replacement
	673	return img_type
	674
[10677]	675
[8466]	676	def check_pdf(bytestream, file):
	677	"""Tell whether a file or bytestream is a PDF file.
	678
	679	Works as a test/plugin for the stdlib `imghdr` library.
	680	"""
	681	if file is not None:
	682	file.seek(0)
	683	bytestream = file.read(4)
	684	file.seek(0)
	685
	686	if bytestream.startswith('%PDF'):
	687	return 'pdf'
	688	return None
	689
	690	# register check_pdf as header check function with `imghdr`
	691	if check_pdf not in imghdr.tests:
	692	imghdr.tests.append(check_pdf)
[8631]	693
[10677]	694
[8631]	695	def merge_csv_files(path1, path2):
	696	"""Merge two CSV files into one (appending).
	697
	698	CSV data from `path2` will be merged into `path1` csv file. This
	699	is a bit like 'appending' data from path2 to data from path1.
	700
	701	The path of the resulting temporary file will be returned.
	702
	703	In the result file data from `path2` will always come _after_ data
	704	from `path1`.
	705
	706	Caution: It is the _callers_ responsibility to remove the
	707	result file (which is created by tempfile.mkstemp) after usage.
	708
	709	This CSV file merging copes with different column orders in both
	710	CSV files and even with different column sets in both files.
[8633]	711
	712	Also broken/empty CSV files can be handled.
[8631]	713	"""
	714	# sniff the col names
[8633]	715	try:
	716	row10 = csv.DictReader(open(path1, 'rb')).next()
	717	except StopIteration:
	718	row10 = dict()
	719	try:
	720	row20 = csv.DictReader(open(path2, 'rb')).next()
	721	except StopIteration:
	722	row20 = dict()
[8631]	723	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
	724	# now read/write the real data
	725	reader1 = csv.DictReader(open(path1, 'rb'))
	726	reader2 = csv.DictReader(open(path2, 'rb'))
	727	wp, tmp_path = tempfile.mkstemp()
	728	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
[10677]	729	writer.writerow(dict((x, x) for x in fieldnames)) # header
[8631]	730	for row in reader1:
	731	writer.writerow(row)
	732	for row in reader2:
	733	writer.writerow(row)
	734	return tmp_path
[9372]	735
[10677]	736
[9372]	737	def product(sequence, start=1):
	738	"""Returns the product of a sequence of numbers (_not_ strings)
	739	multiplied by the parameter `start` (defaults to 1). If the
	740	sequence is empty, returns 0.
	741	"""
	742	if not len(sequence):
	743	return 0
	744	result = start
	745	for item in sequence:
	746	result *= item
	747	return result
[9593]	748
[10677]	749
[9593]	750	class NullHandler(logging.Handler):
	751	"""A logging NullHandler.
	752
	753	Does not log anything. Useful if you want to shut up a log.
	754
	755	Defined here for backwards compatibility with Python < 2.7.
	756	"""
	757	def emit(self, record):
	758	pass
[10676]	759
	760
	761	def check_csv_charset(iterable):
[14939]	762	"""Check contents of `iterable` regarding valid CSV encoding and
	763	trailing whitespaces in data.
[10676]	764
	765	`iterable` is expected to be an iterable on _rows_ (not
	766	chars). This is true for instance for
	767	filehandlers. `zope.publisher.browser.FileUpload` instances are
	768	_not_ iterable, unfortunately.
	769
	770	Returns line num of first illegal char or ``None``. Line nums
[14939]	771	start counting with 1 (not zero). Returns -1 if data contain
	772	trailing whitespaces.
[10676]	773	"""
	774	linenum = 1
	775	try:
[13537]	776	reader = csv.DictReader(iterable)
[10676]	777	for row in reader:
	778	linenum += 1
[14939]	779	for value in row.values():
	780	if value.endswith(' '):
	781	return -1
[10676]	782	except UnicodeDecodeError:
	783	return linenum
	784	except:
	785	return linenum + 1
	786	return None
[11824]	787
	788
	789	class MemInfo(dict):
	790	"""A dict with access to its items like if they are attributes.
	791	"""
	792	__getattr__ = dict.__getitem__
	793	__setattr__ = dict.__setitem__
	794	__delattr__ = dict.__delitem__
	795
	796
	797	def get_meminfo(src="/proc/meminfo"):
	798	"""Get local memory info as provided in /proc/meminfo.
	799
	800	Entries in /proc/meminfo are available as MemInfo attributes.
	801
	802	By default we lookup a file /proc/meminfo. Another path can be
	803	lines = open(src, 'r').read()passed in as `src` parameter. In this
	804	case `src` must be a regular file and contain meminfo-style data.
	805
	806	If the given `src` (or `/proc/meminfo`) are not available, `None`
	807	lines = open(src, 'r').read()is returned.
	808	"""
	809	if not os.path.isfile(src):
	810	return None
	811	lines = open(src, 'r').read().splitlines()
	812	result = MemInfo()
	813	for line in lines:
	814	key, value = line.split(':', 1)
	815	value = int(value.split(' kB', 1)[0])
	816	result[key] = value
	817	return result
[12231]	818
	819	def html2dict(value=None,portal_language='en'):
	820	"""Transforms a localized HTML text string into a dictionary.
	821
[13077]	822	Different languages must be separated by ``>>xy<<`` whereas
[12231]	823	xy is the language code. Text parts without correct leading
	824	language separator - usually the first part has no language
	825	descriptor - are interpreted as texts in the portal's language.
	826	"""
	827	try:
	828	parts = value.split('>>')
	829	except:
	830	return {}
	831	elements = {}
	832	lang = portal_language
	833	for part in parts:
	834	if part[2:4] == u'<<':
[12393]	835	lang = str(part[0:2].lower())
[12231]	836	text = part[4:]
	837	elements[lang] = renderElement(u'div id="html"',
	838	contents=text)
	839	else:
	840	text = part
	841	elements[lang] = renderElement(u'div id="html"',
	842	contents=text)
[12433]	843	return elements
	844
	845	def rest2dict(value=None,portal_language='en'):
	846	"""Transforms a localized REST text string into a dictionary.
	847
[13077]	848	Different languages must be separated by ``>>xy<<``` whereas
[12433]	849	xy is the language code. Text parts without correct leading
	850	language separator - usually the first part has no language
	851	descriptor - are interpreted as texts in the portal's language.
	852	"""
	853	try:
	854	parts = value.split('>>')
	855	except:
	856	return {}
	857	elements = {}
	858	lang = portal_language
	859	for part in parts:
	860	if part[2:4] == u'<<':
	861	lang = str(part[0:2].lower())
	862	text = part[4:]
	863	elements[lang] = renderElement(u'div id="rest"',
	864	contents=ReST2HTML(text))
	865	else:
	866	text = part
	867	elements[lang] = renderElement(u'div id="rest"',
	868	contents=ReST2HTML(text))
[15595]	869	return elements
	870
	871
	872
	873	class FormVarParser(HTMLParser):
	874	"""An HTML form parser that extracts keys and values.
	875
	876	Fed with an HTML document, we parse all starttags and check for each,
	877	whether it provides a `name` and a `value` attribute. If so, the
	878	values of the respective attributes are stored in instance var
	879	`form_vars` as a dict entry.
	880	"""
	881
	882	def __init__(self):
	883	HTMLParser.__init__(self) # old-style class - no super()
	884	self.form_vars = {}
	885
	886	def handle_starttag(self, tag, attrs):
	887	tag_attrs = {}
	888	for key, val in attrs:
	889	tag_attrs[key] = val
	890	if 'name' in tag_attrs and 'value' in tag_attrs:
[15597]	891	self.form_vars[tag_attrs['name']] = unicode(tag_attrs['value'])
[15595]	892
	893
	894	def extract_formvars(html_code):
	895	"""Extract keys and values from an HTML form as dict.
	896
	897	No text, no values::
	898
	899	>>> extract_formvars("")
	900	{}
	901
	902	Simple input tags normally provide name and value::
	903
	904	>>> extract_formvars("<input type='text' name='foo' value='bar'>")
[15627]	905	{'foo': u'bar'}
[15595]	906
	907	The sample doc we stored in tests is a bit more difficult::
	908
	909	>>> html_path = os.path.join(os.path.dirname(__file__),
	910	... 'tests', 'sample_response.html')
	911	>>> html_code = open(html_path, 'r').read()
	912	>>> import pprint
	913	>>> pprint.pprint(extract_formvars(html_code))
[15627]	914	{'AMOUNT': u'100',
[15595]	915	...
[15627]	916	'TRANS_NUM': u'01ESA20190916134824YA3YJ8'}
[15595]	917
	918	"""
	919	result = {}
	920	parser = FormVarParser()
	921	parser.feed(html_code)
	922	return parser.form_vars
[15739]	923
	924
	925	def get_catalog_docids(cat):
	926	"""Get all docids for a given catalog `cat`.
	927
	928	Catalogs store the ids of objects they index. Get all of these object ids.
	929	This function works at least for catalogs that provide field- and text
	930	indexes only.
	931	"""
	932	result = []
	933	for index in cat.values():
	934	try:
	935	# FieldIndexes
	936	result.extend(list(index._rev_index.keys()))
	937	except AttributeError:
	938	# TextIndexes
	939	result.extend(list(index.index._docwords.keys()))
	940	return set(result)
	941
	942
	943	def reindex_cat(cat):
	944	"""Reindex all objects stored in a catalog `cat`.
	945
	946	Regular catalogs try to reindex all stored object ids of a ZODB when asked
	947	to reindex all contents. That can be overkill. This function reindexes only
	948	those objects, that were already stored in a catalog. It was tested for
	949	catalogs with at least 650000 objects.
	950
	951	Please note, that reindexing catalgos, can take a considerable amount of
	952	time. 100.000 objects took about 12 minutes to reindex on a 16 GB machine.
	953	"""
	954	d1 = datetime.datetime.now()
	955	print("Collecting doc ids...")
	956	uidutil = getUtility(IIntIds, context=cat)
	957	uids = get_catalog_docids(cat)
	958	print("Found %s entries..." % len(uids))
[15748]	959	for n, docid in enumerate(uids):
[15739]	960	ob = uidutil.getObject(docid)
	961	cat.index_doc(docid, ob)
[15748]	962	# indexes can become huge. commit changes every 5000th round to
	963	# keep the memory footprint of catalogs `updateIndex` manageable
	964	if not n % 5000:
	965	transaction.commit()
[15739]	966	d2 = datetime.datetime.now()
	967	print("Finished. %s" % (d2 - d1))

Note: See TracBrowser for help on using the repository browser.

Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 16536

Download in other formats: