Context navigation

helpers.py @ 16472

Last change on this file since 16472 was 15748, checked in by uli, 6 years ago

Bad workaround to cope with overcrowded catalogs.

Zope does not cope well with catalogs of more than 500.000 elements.
Reindexing the catalog then means to put all objects into memory before
saving them - this can blow everything.

We therefore use a dirty hack to replace the updateIndex method of
a catalog with a more careful function, that, however, should not be
run in production mode. No new items should be added during the run.

In the long run, we certainly need something more sustainable.

Property svn:keywords set to Id

File size: 29.8 KB

Line
1	## $Id: helpers.py 15748 2019-11-04 10:15:44Z uli $
2	##
3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""General helper functions for Kofa.
19	"""
20	import unicodecsv as csv # XXX: csv ops should move to dedicated module.
21	import datetime
22	import imghdr
23	import logging
24	import os
25	import pytz
26	import re
27	import shutil
28	import tempfile
29	import transaction
30	import grok
31	from cStringIO import StringIO
32	from docutils.core import publish_string
33	from HTMLParser import HTMLParser
34	from zope.component import getUtility
35	from zope.component.interfaces import IFactory
36	from zope.interface import implementedBy
37	from zope.interface.interface import Method, Attribute
38	from zope.intid.interfaces import IIntIds
39	from zope.schema import getFieldNames
40	from zope.schema.fieldproperty import FieldProperty
41	from zope.security.interfaces import NoInteraction
42	from zope.security.management import getInteraction
43	from zope.pluggableauth.interfaces import IAuthenticatorPlugin
44	from zope.formlib.widget import renderElement
45
46
47	BUFSIZE = 8 * 1024
48
49
50	def remove_file_or_directory(filepath):
51	"""Remove a file or directory.
52
53	Different to :func:`shutil.rmtree` we also accept not existing
54	paths (returning silently) and if a dir turns out to be a regular
55	file, we remove that.
56	"""
57	filepath = os.path.abspath(filepath)
58	if not os.path.exists(filepath):
59	return
60	if os.path.isdir(filepath):
61	shutil.rmtree(filepath)
62	else:
63	os.unlink(filepath)
64	return
65
66
67	def copy_filesystem_tree(src, dst, overwrite=False, del_old=False):
68	"""Copy contents of directory src to directory dst.
69
70	Both directories must exists.
71
72	If `overwrite` is true, any same named objects will be
73	overwritten. Otherwise these files will not be touched.
74
75	If `del_old` is true, copied files and directories will be removed
76	from the src directory.
77
78	This functions returns a list of non-copied files.
79
80	Unix hidden files and directories (starting with '.') are not
81	processed by this function.
82	"""
83	if not os.path.exists(src):
84	raise ValueError('source path does not exist: %s' % src)
85	if not os.path.exists(dst):
86	raise ValueError('destination path does not exist: %s' % dst)
87	if not os.path.isdir(src):
88	raise ValueError('source path is not a directory: %s' % src)
89	if not os.path.isdir(dst):
90	raise ValueError('destination path is not a directory: %s' % dst)
91	not_copied = []
92	for item in os.listdir(src):
93	if item.startswith('.'):
94	continue # We do not copy hidden stuff...
95	itemsrc = os.path.join(src, item)
96	itemdst = os.path.join(dst, item)
97
98	if os.path.exists(itemdst):
99	if overwrite is True:
100	remove_file_or_directory(itemdst)
101	else:
102	not_copied.append(item)
103	continue
104
105	if os.path.isdir(itemsrc):
106	shutil.copytree(itemsrc, itemdst)
107	else:
108	shutil.copy2(itemsrc, itemdst)
109	if del_old:
110	remove_file_or_directory(itemsrc)
111	return not_copied
112
113
114	def get_inner_HTML_part(html_code):
115	"""Return the 'inner' part of a complete HTML snippet.
116
117	If there is a form part, get this.
118
119	If there is no form part, try to return the body part contents.
120
121	If there is no body, return as-is.
122
123	Let's see how that works. If we deliver some doc with form, we
124	will get that form only:
125
126	>>> doc = '<html><form>My Form</form>Outside the form</html>'
127	>>> get_inner_HTML_part(doc)
128	'<form>My Form</form>'
129
130	No form? Then seek for a body part and get the contents:
131
132	>>> doc = '<html><body>My Body</body>Trailing Trash</html>'
133	>>> get_inner_HTML_part(doc)
134	'My Body'
135
136	If none of these is included, return what we got:
137
138	>>> doc = '<html>without body nor form</html>'
139	>>> get_inner_HTML_part(doc)
140	'<html>without body nor form</html>'
141
142	"""
143
144	try:
145	result = re.match('^.+(<form[^\>]>.</form>).+$', html_code,
146	re.DOTALL).groups()[0]
147	return result
148	except AttributeError:
149	# No <form> part included
150	try:
151	result = re.match('^.+<body[^\>]>(.)</body>.*$', html_code,
152	re.DOTALL).groups()[0]
153	return result
154	except AttributeError:
155	# No <form> and no <body> tag...
156	pass
157	return html_code
158
159
160	class FactoryBase(grok.GlobalUtility):
161	"""A factory for things.
162
163	This is a baseclass for easier creation of factories. Factories
164	are utilities that are registered under a certain name and return
165	instances of certain classes when called.
166
167	In :mod:`waeup.kofa` we use factories extensively for
168	batching. While processing a batch some processors looks up a
169	factory to create real-world instances that then get filled with
170	data from imported CSV files.
171
172	To get rid of reimplementing the same stuff over and over again,
173	most notably the methods defined here, we offer this base class
174	(which will not be registered as a factory itself).
175
176	Real factories can then be created like this:
177
178	>>> import grok
179	>>> from waeup.kofa.utils.helpers import FactoryBase
180	>>> class MyObject(object):
181	... # Some class we want to get instances of.
182	... pass
183	>>> class MyObjectFactory(FactoryBase):
184	... # This is the factory for MyObject instances
185	... grok.name(u'waeup.kofa.factory.MyObject')
186	... factory = MyObject
187
188	That's it. It is essential to set the ``factory`` attribute, which
189	will determine the class of which instances should be created when
190	called. The given name must even be unique amongst all utilities
191	registered during runtime. While you can pick any name you like
192	you might want to prepend ``waeup.kofa.factory.`` to the name
193	string to make sure it does not clash with names of other
194	utilities one day.
195
196	Before all this works we have to grok the baseclass once and our
197	freshly defined factory. This executes all the component
198	registration stuff we don't want to do ourselves. In daily use
199	this is done automatically on startup of a :mod:`waeup.kofa`
200	system.
201
202	>>> grok.testing.grok('waeup.kofa.utils.helpers')
203	>>> grok.testing.grok_component(
204	... 'MyObjectFactory', MyObjectFactory
205	... )
206	True
207
208	After grokking we (and processors) can create objects without
209	knowing about the location of the real class definition, just by
210	the factory name:
211
212	>>> from zope.component import createObject
213	>>> obj = createObject('waeup.kofa.factory.MyObject')
214	>>> isinstance(obj, MyObject)
215	True
216
217	We can also use the regular utility lookups to find our new
218	factory:
219
220	>>> from zope.component import getUtility
221	>>> from zope.component.interfaces import IFactory
222	>>> factory = getUtility(
223	... IFactory, name='waeup.kofa.factory.MyObject'
224	... )
225	>>> isinstance(factory, MyObjectFactory)
226	True
227
228	And this factory generates `MyObject` instances:
229
230	>>> obj = factory()
231	>>> isinstance(obj, MyObject)
232	True
233
234	"""
235	grok.baseclass() # Do not grok this class, do not register us.
236	grok.implements(IFactory)
237	# You can override any of the following attributes in derived
238	# classes. The `grok.name` setting must even be set to some
239	# unique value.
240	grok.name(u'waeup.Factory')
241	title = u"Create instances of ``factory``.",
242	description = u"This factory instantiates new applicant instances."
243	factory = None
244
245	def __call__(self, args, *kw):
246	"""The main factory function.
247
248	Returns an instance of the requested object.
249	"""
250	return self.factory()
251
252	def getInterfaces(self):
253	# Required by IFactory
254	return implementedBy(self.factory)
255
256
257	def ReST2HTML_w_warnings(source_string):
258	"""Convert a reStructuredText string to HTML preserving warnings.
259
260	Returns a tuple ``(<HTML_CODE>, <WARNINGS>)``, both being
261	strings. Where ``<HTML_CODE>`` is the HTML code generated from the
262	source string (in unicode), ``<WARNINGS>`` is a string containing
263	any warning messages or ``None``.
264
265	Regular multi-line ReStructuredText strings will be returned as
266	HTML code:
267
268	>>> from waeup.kofa.utils.helpers import ReST2HTML
269	>>> source = '''
270	... Headline
271	... ========
272	...
273	... - A list item
274	... - Another item
275	...
276	... Thanks for watching!
277	... '''
278	>>> html, warnings = ReST2HTML_w_warnings(source)
279	>>> print html
280	<div class="document" id="headline">
281	<h1 class="title">Headline</h1>
282	<BLANKLINE>
283	<ul class="simple">
284	<li>A list item</li>
285	<li>Another item</li>
286	</ul>
287	<p>Thanks for watching!</p>
288	</div>
289
290	Here no warnings happened, so the `warnings` are ``None``:
291
292	>>> warnings is None
293	True
294
295	If warnings happen then they can be retrieved in the returned
296	``warnings``. We try to render an erraneous document:
297
298	>>> source = '''
299	... Headline
300	... ======
301	...
302	... Thanks for watching!
303	... '''
304	>>> html, warnings = ReST2HTML_w_warnings(source)
305	>>> print html
306	<div class="document" id="headline">
307	<h1 class="title">Headline</h1>
308	<BLANKLINE>
309	<p>Thanks for watching!</p>
310	</div>
311
312	>>> print warnings
313	<string>:3: (WARNING/2) Title underline too short.
314	<BLANKLINE>
315	Headline
316	======
317	<BLANKLINE>
318
319	As you can see, the warnings are not displayed inline the document
320	but can be retrieved from the returned warnings, which is a string
321	or ``None``.
322	"""
323	warnings = StringIO()
324	fulldoc = publish_string(
325	source_string, writer_name='html4css1',
326	settings_overrides={
327	'report_level': 0,
328	'warning_stream': warnings,
329	})
330	warnings.seek(0)
331	warning_msgs = warnings.read()
332	if warning_msgs:
333	# Render again, this time with no warnings inline...
334	fulldoc = publish_string(
335	source_string, writer_name='html4css1',
336	settings_overrides={
337	'report_level': 10000,
338	'halt_level': 10000,
339	'warning_stream': warnings,
340	})
341	if warning_msgs == '':
342	warning_msgs = None
343	result = get_inner_HTML_part(fulldoc).strip()
344	if not isinstance(result, unicode):
345	result = result.decode('utf-8')
346	return result, warning_msgs
347
348
349	def ReST2HTML(source_string):
350	"""Render a string containing ReStructuredText to HTML.
351
352	Any warnings about too short headings, etc. are silently
353	discarded. Use :func:`ReST2HTML_w_warnings` if you want to get any
354	warnings.
355
356	The returned string will be unicode.
357
358	A regular document will be rendered like this:
359
360	>>> source = '''
361	... Headline
362	... ========
363	...
364	... Thanks for watching!
365	... '''
366	>>> html = ReST2HTML(source)
367	>>> print html
368	<div class="document" id="headline">
369	<h1 class="title">Headline</h1>
370	<BLANKLINE>
371	<p>Thanks for watching!</p>
372	</div>
373
374	A document with markup problems (here: the underline is too short)
375	will look similar:
376
377	>>> source = '''
378	... Headline
379	... ======
380	...
381	... Thanks for watching!
382	... '''
383	>>> html = ReST2HTML(source)
384	>>> print html
385	<div class="document" id="headline">
386	<h1 class="title">Headline</h1>
387	<BLANKLINE>
388	<p>Thanks for watching!</p>
389	</div>
390
391	"""
392	html, warnings = ReST2HTML_w_warnings(source_string)
393	return html
394
395
396	def attrs_to_fields(cls, omit=[]):
397	"""Set class attributes and bind them to the data definitions
398	specified in the interface by turning the attributes into FieldProperty
399	instances.
400
401	With Python >= 2.6 we can even use this function as a class decorator.
402
403	`omit` is a list of field names that should _not_ be turned into
404	field properties. This is useful for properties and the like.
405	"""
406	iface = list(implementedBy(cls))[0]
407	for field_name in getFieldNames(iface):
408	if field_name in omit:
409	continue
410	field_property = FieldProperty(iface[field_name])
411	# Set proper docstring for the API docs.
412	field_property.__doc__ = iface[field_name].title + ' (computed attribute)'
413	setattr(cls, field_name, field_property)
414	return cls
415
416
417	def get_current_principal():
418	"""Get the 'current' principal.
419
420	This method works without a request. Examining a request is the
421	regular (and recommended) way to get a principal involved
422	'currently'.
423
424	Use this method only if you really have no access to the current
425	request.
426
427	Returns ``None`` when no principal is involved (for instance
428	during tests).
429	"""
430	try:
431	principal = getInteraction().participations[0].principal
432	except NoInteraction:
433	return None
434	except IndexError: # No participations present
435	return None
436	return principal
437
438
439	def cmp_files(file_descr1, file_descr2):
440	"""Compare two files by their file descriptors.
441
442	Returns ``True`` if both are equal, ``False`` otherwise.
443	"""
444	file_descr1.seek(0)
445	file_descr2.seek(0)
446	while True:
447	b1 = file_descr1.read(BUFSIZE)
448	b2 = file_descr2.read(BUFSIZE)
449	if b1 != b2:
450	return False
451	if not b1:
452	return True
453
454
455	def string_from_bytes(number):
456	"""Turn a number into some textual representation.
457
458	Examples:
459
460	>>> string_from_bytes(1)
461	u'1 byte(s)'
462
463	>>> string_from_bytes(1025)
464	u'1 KB'
465
466	>>> string_from_bytes(1.5 * 1024*1024)
467	u'1.50 MB'
468
469	>>> string_from_bytes(673.286 * 1024**3)
470	u'673.29 GB'
471
472	"""
473	if number < 1024:
474	return u'%s byte(s)' % (str(number),)
475	elif number < 1024 ** 2:
476	return u'%s KB' % (number / 1024,)
477	elif number < 1024 ** 3:
478	return u'%.2f MB' % (number / 1024 ** 2,)
479	return u'%.2f GB' % (number / 1024 ** 3,)
480
481
482	def file_size(file_like_obj):
483	"""Determine file size in most effective manner.
484
485	Returns the number of bytes in a file. This function works for
486	both, real files as well as file-like objects like cStringIO based
487	'files'.
488
489	Example:
490
491	>>> from cStringIO import StringIO
492	>>> file_size(StringIO('my file content'))
493	15
494
495	Please note that this function expects the file-like object passed
496	in to be at first reading position (it does no seek(0)) and that
497	when finished the file pointer might be at end of file.
498	"""
499	if hasattr(file_like_obj, 'fileno'):
500	return os.fstat(file_like_obj.fileno())[6]
501	file_like_obj.seek(0, 2) # seek to last position in file
502	return file_like_obj.tell()
503
504
505	def get_user_account(request):
506	"""Return local user account.
507	"""
508	principal_id = request.principal.id
509	authenticator = getUtility(IAuthenticatorPlugin, name='users')
510	account = authenticator.getAccount(principal_id)
511	return account
512
513
514	def iface_names(iface, omit=[], exclude_attribs=True, exclude_methods=True):
515	"""Get all attribute names of an interface.
516
517	Searches also base interfaces.
518
519	Names of fields that are pure attributes
520	(i.e. zope.interface.Attribute) or methods are excluded by
521	default.
522
523	Names of typical fields derived from zope.schema are included.
524
525	The `omit` paramter can give a list of names to exclude.
526
527	Returns an unsorted list of strings.
528	"""
529	ifaces = set((iface,))
530	# Collect all interfaces (also bases) recursively
531	while True:
532	ext_ifaces = set(ifaces)
533	for iface in ext_ifaces:
534	ext_ifaces = set.union(ext_ifaces, set(iface.getBases()))
535	if ext_ifaces == ifaces:
536	# No new interfaces found, list complete
537	break
538	ifaces = ext_ifaces
539	# Collect (filtered) names of collected interfaces
540	result = []
541	for iface in ifaces:
542	for name, descr in iface.namesAndDescriptions():
543	if name in omit:
544	continue
545	if exclude_attribs and descr.__class__ is Attribute:
546	continue
547	if exclude_methods and isinstance(descr, Method):
548	continue
549	if name in result:
550	continue
551	result.append(name)
552	return result
553
554
555	def get_sorted_preferred(tuples_iterable, preferred_list):
556	"""Get a list of tuples (<TITLE>,<TOKEN>) with values in
557	`preferred_list` put in front.
558
559	The rest of the tuples iterable is returned in orginal order. This
560	is useful for putting default entries on top of (already sorted)
561	lists of choice values, for instance when sorting countries and
562	their code.
563
564	Sample:
565
566	We have a list of tuples with uppercase 'titles' and lowercase
567	'tokens'. This list is already sorted but we want certain values
568	of this list to show up before other values. For instance we want
569	to see the 'C' entry to come first.
570
571	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
572	... ['c'])
573	(('C', 'c'), ('A', 'a'), ('B', 'b'))
574
575	i.e. the entry with 'c' as second value moved to head of result.
576
577	We can also require multiple entries at head of list:
578
579	>>> get_sorted_preferred([('A','a'), ('B','b'), ('C','c')],
580	... ['b', 'c'])
581	(('B', 'b'), ('C', 'c'), ('A', 'a'))
582
583	We required the 'b' entry to come before the 'c' entry and then
584	the rest of the input list. That's what we got.
585
586	The result is returned as a tuple of tuples to keep order of values.
587	"""
588	result = [None for x in preferred_list]
589	for title, code in tuples_iterable:
590	if code in preferred_list:
591	index = preferred_list.index(code)
592	result[index] = (title, code)
593	else:
594	result.append((title, code))
595	return tuple(result)
596
597
598	def now(tz=None):
599	"""Get current datetime in timezone of `tz`.
600
601	If `tz`, a `tzinfo` instance, is None, UTC time is returned.
602
603	`tz` should be a timezone as defined in pytz.
604	"""
605	return to_timezone(datetime.datetime.utcnow(), tz=tz)
606
607
608	def to_timezone(dt, tz=None):
609	"""Shift datetime into timezone `tz`.
610
611	If datetime `dt` contains no `tzinfo` (i.e. it is 'naive'), it is
612	assumed to be UTC.
613
614	If no `tz` is given, shift to UTC is performed.
615
616	If `dt` is not a datetime.datetime, the input value is returned
617	unchanged.
618	"""
619	if not isinstance(dt, datetime.datetime):
620	return dt
621	if tz is None:
622	tz = pytz.utc
623	if dt.tzinfo is None:
624	dt = pytz.utc.localize(dt)
625	return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
626
627
628	def imghdr_test_fpm(h, f):
629	"""FPM fileformat test.
630
631	The `fpm` fileformat is the binary fingerprint data as created by
632	`libfprint`.
633	"""
634	if len(h) >= 3 and h[:3] == 'FP1':
635	return 'fpm'
636
637
638	#: Add test function in stdlib's imghdr tests.
639	imghdr.tests.append(imghdr_test_fpm)
640
641
642	def get_fileformat(path, bytestream=None):
643	"""Try to determine the file format of a given media file.
644
645	Although checks done here are not done very thoroughly, they make
646	no assumptions about the filetype by looking at its filename
647	extension or similar. Instead they check header data to comply
648	with common known rules (Magic Words).
649
650	If bytestream is not `None` the `path` is ignored.
651
652	Returns filetype as string (something like ``'jpg'``) if
653	file-format can be recognized, ``None`` else.
654
655	Tested recognized filetypes currently are `jpg`, `png`, `fpm`, and
656	`pdf`.
657
658	More filetypes (though untested in waeup.kofa) are automatically
659	recognized because we deploy the stdlib `imghdr` library. See this
660	module's docs for a complete list of filetypes recognized.
661	"""
662	if path is None and bytestream is None:
663	return None
664
665	img_type = None
666	if bytestream is not None:
667	img_type = imghdr.what(path, bytestream)
668	else:
669	img_type = imghdr.what(path)
670	for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
671	if img_type == name:
672	img_type = replacement
673	return img_type
674
675
676	def check_pdf(bytestream, file):
677	"""Tell whether a file or bytestream is a PDF file.
678
679	Works as a test/plugin for the stdlib `imghdr` library.
680	"""
681	if file is not None:
682	file.seek(0)
683	bytestream = file.read(4)
684	file.seek(0)
685
686	if bytestream.startswith('%PDF'):
687	return 'pdf'
688	return None
689
690	# register check_pdf as header check function with `imghdr`
691	if check_pdf not in imghdr.tests:
692	imghdr.tests.append(check_pdf)
693
694
695	def merge_csv_files(path1, path2):
696	"""Merge two CSV files into one (appending).
697
698	CSV data from `path2` will be merged into `path1` csv file. This
699	is a bit like 'appending' data from path2 to data from path1.
700
701	The path of the resulting temporary file will be returned.
702
703	In the result file data from `path2` will always come _after_ data
704	from `path1`.
705
706	Caution: It is the _callers_ responsibility to remove the
707	result file (which is created by tempfile.mkstemp) after usage.
708
709	This CSV file merging copes with different column orders in both
710	CSV files and even with different column sets in both files.
711
712	Also broken/empty CSV files can be handled.
713	"""
714	# sniff the col names
715	try:
716	row10 = csv.DictReader(open(path1, 'rb')).next()
717	except StopIteration:
718	row10 = dict()
719	try:
720	row20 = csv.DictReader(open(path2, 'rb')).next()
721	except StopIteration:
722	row20 = dict()
723	fieldnames = sorted(list(set(row10.keys() + row20.keys())))
724	# now read/write the real data
725	reader1 = csv.DictReader(open(path1, 'rb'))
726	reader2 = csv.DictReader(open(path2, 'rb'))
727	wp, tmp_path = tempfile.mkstemp()
728	writer = csv.DictWriter(os.fdopen(wp, 'wb'), fieldnames)
729	writer.writerow(dict((x, x) for x in fieldnames)) # header
730	for row in reader1:
731	writer.writerow(row)
732	for row in reader2:
733	writer.writerow(row)
734	return tmp_path
735
736
737	def product(sequence, start=1):
738	"""Returns the product of a sequence of numbers (_not_ strings)
739	multiplied by the parameter `start` (defaults to 1). If the
740	sequence is empty, returns 0.
741	"""
742	if not len(sequence):
743	return 0
744	result = start
745	for item in sequence:
746	result *= item
747	return result
748
749
750	class NullHandler(logging.Handler):
751	"""A logging NullHandler.
752
753	Does not log anything. Useful if you want to shut up a log.
754
755	Defined here for backwards compatibility with Python < 2.7.
756	"""
757	def emit(self, record):
758	pass
759
760
761	def check_csv_charset(iterable):
762	"""Check contents of `iterable` regarding valid CSV encoding and
763	trailing whitespaces in data.
764
765	`iterable` is expected to be an iterable on _rows_ (not
766	chars). This is true for instance for
767	filehandlers. `zope.publisher.browser.FileUpload` instances are
768	_not_ iterable, unfortunately.
769
770	Returns line num of first illegal char or ``None``. Line nums
771	start counting with 1 (not zero). Returns -1 if data contain
772	trailing whitespaces.
773	"""
774	linenum = 1
775	try:
776	reader = csv.DictReader(iterable)
777	for row in reader:
778	linenum += 1
779	for value in row.values():
780	if value.endswith(' '):
781	return -1
782	except UnicodeDecodeError:
783	return linenum
784	except:
785	return linenum + 1
786	return None
787
788
789	class MemInfo(dict):
790	"""A dict with access to its items like if they are attributes.
791	"""
792	__getattr__ = dict.__getitem__
793	__setattr__ = dict.__setitem__
794	__delattr__ = dict.__delitem__
795
796
797	def get_meminfo(src="/proc/meminfo"):
798	"""Get local memory info as provided in /proc/meminfo.
799
800	Entries in /proc/meminfo are available as MemInfo attributes.
801
802	By default we lookup a file /proc/meminfo. Another path can be
803	lines = open(src, 'r').read()passed in as `src` parameter. In this
804	case `src` must be a regular file and contain meminfo-style data.
805
806	If the given `src` (or `/proc/meminfo`) are not available, `None`
807	lines = open(src, 'r').read()is returned.
808	"""
809	if not os.path.isfile(src):
810	return None
811	lines = open(src, 'r').read().splitlines()
812	result = MemInfo()
813	for line in lines:
814	key, value = line.split(':', 1)
815	value = int(value.split(' kB', 1)[0])
816	result[key] = value
817	return result
818
819	def html2dict(value=None,portal_language='en'):
820	"""Transforms a localized HTML text string into a dictionary.
821
822	Different languages must be separated by ``>>xy<<`` whereas
823	xy is the language code. Text parts without correct leading
824	language separator - usually the first part has no language
825	descriptor - are interpreted as texts in the portal's language.
826	"""
827	try:
828	parts = value.split('>>')
829	except:
830	return {}
831	elements = {}
832	lang = portal_language
833	for part in parts:
834	if part[2:4] == u'<<':
835	lang = str(part[0:2].lower())
836	text = part[4:]
837	elements[lang] = renderElement(u'div id="html"',
838	contents=text)
839	else:
840	text = part
841	elements[lang] = renderElement(u'div id="html"',
842	contents=text)
843	return elements
844
845	def rest2dict(value=None,portal_language='en'):
846	"""Transforms a localized REST text string into a dictionary.
847
848	Different languages must be separated by ``>>xy<<``` whereas
849	xy is the language code. Text parts without correct leading
850	language separator - usually the first part has no language
851	descriptor - are interpreted as texts in the portal's language.
852	"""
853	try:
854	parts = value.split('>>')
855	except:
856	return {}
857	elements = {}
858	lang = portal_language
859	for part in parts:
860	if part[2:4] == u'<<':
861	lang = str(part[0:2].lower())
862	text = part[4:]
863	elements[lang] = renderElement(u'div id="rest"',
864	contents=ReST2HTML(text))
865	else:
866	text = part
867	elements[lang] = renderElement(u'div id="rest"',
868	contents=ReST2HTML(text))
869	return elements
870
871
872
873	class FormVarParser(HTMLParser):
874	"""An HTML form parser that extracts keys and values.
875
876	Fed with an HTML document, we parse all starttags and check for each,
877	whether it provides a `name` and a `value` attribute. If so, the
878	values of the respective attributes are stored in instance var
879	`form_vars` as a dict entry.
880	"""
881
882	def __init__(self):
883	HTMLParser.__init__(self) # old-style class - no super()
884	self.form_vars = {}
885
886	def handle_starttag(self, tag, attrs):
887	tag_attrs = {}
888	for key, val in attrs:
889	tag_attrs[key] = val
890	if 'name' in tag_attrs and 'value' in tag_attrs:
891	self.form_vars[tag_attrs['name']] = unicode(tag_attrs['value'])
892
893
894	def extract_formvars(html_code):
895	"""Extract keys and values from an HTML form as dict.
896
897	No text, no values::
898
899	>>> extract_formvars("")
900	{}
901
902	Simple input tags normally provide name and value::
903
904	>>> extract_formvars("<input type='text' name='foo' value='bar'>")
905	{'foo': u'bar'}
906
907	The sample doc we stored in tests is a bit more difficult::
908
909	>>> html_path = os.path.join(os.path.dirname(__file__),
910	... 'tests', 'sample_response.html')
911	>>> html_code = open(html_path, 'r').read()
912	>>> import pprint
913	>>> pprint.pprint(extract_formvars(html_code))
914	{'AMOUNT': u'100',
915	...
916	'TRANS_NUM': u'01ESA20190916134824YA3YJ8'}
917
918	"""
919	result = {}
920	parser = FormVarParser()
921	parser.feed(html_code)
922	return parser.form_vars
923
924
925	def get_catalog_docids(cat):
926	"""Get all docids for a given catalog `cat`.
927
928	Catalogs store the ids of objects they index. Get all of these object ids.
929	This function works at least for catalogs that provide field- and text
930	indexes only.
931	"""
932	result = []
933	for index in cat.values():
934	try:
935	# FieldIndexes
936	result.extend(list(index._rev_index.keys()))
937	except AttributeError:
938	# TextIndexes
939	result.extend(list(index.index._docwords.keys()))
940	return set(result)
941
942
943	def reindex_cat(cat):
944	"""Reindex all objects stored in a catalog `cat`.
945
946	Regular catalogs try to reindex all stored object ids of a ZODB when asked
947	to reindex all contents. That can be overkill. This function reindexes only
948	those objects, that were already stored in a catalog. It was tested for
949	catalogs with at least 650000 objects.
950
951	Please note, that reindexing catalgos, can take a considerable amount of
952	time. 100.000 objects took about 12 minutes to reindex on a 16 GB machine.
953	"""
954	d1 = datetime.datetime.now()
955	print("Collecting doc ids...")
956	uidutil = getUtility(IIntIds, context=cat)
957	uids = get_catalog_docids(cat)
958	print("Found %s entries..." % len(uids))
959	for n, docid in enumerate(uids):
960	ob = uidutil.getObject(docid)
961	cat.index_doc(docid, ob)
962	# indexes can become huge. commit changes every 5000th round to
963	# keep the memory footprint of catalogs `updateIndex` manageable
964	if not n % 5000:
965	transaction.commit()
966	d2 = datetime.datetime.now()
967	print("Finished. %s" % (d2 - d1))

Note: See TracBrowser for help on using the repository browser.

Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py @ 16472

Download in other formats: