Context navigation

batching.py @ 16231

Last change on this file since 16231 was 16012, checked in by Henrik Bettermann, 5 years ago
Implement `BatchProcessor.checkCreateRequirements` and `BatchProcessor.checkUpdateRequirements` methods mainly to protect course result lists of graduated student.
Property svn:keywords set to `Id`
File size: 35.1 KB

Rev	Line
[7196]	1	## $Id: batching.py 16012 2020-02-24 21:26:35Z henrik $
	2	##
	3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
[7819]	18	"""Kofa components for batch processing.
[4806]	19
	20	Batch processors eat CSV files to add, update or remove large numbers
	21	of certain kinds of objects at once.
	22	"""
	23	import grok
[8380]	24	import datetime
[4821]	25	import os
[9217]	26	import shutil
[4900]	27	import tempfile
[4821]	28	import time
[10027]	29	import unicodecsv
[9816]	30	import zc.async.interfaces
[7859]	31	from cStringIO import StringIO
[9217]	32	from persistent.list import PersistentList
	33	from zope.component import createObject, getUtility
	34	from zope.component.hooks import setSite
[9726]	35	from zope.interface import Interface, implementer
[4806]	36	from zope.schema import getFields
[14552]	37	from zope.schema.interfaces import ConstraintNotSatisfied, RequiredMissing
[8332]	38	from zope.event import notify
[9217]	39	from waeup.kofa.async import AsyncJob
[7811]	40	from waeup.kofa.interfaces import (
[9217]	41	IBatchProcessor, FatalCSVError, IObjectConverter, IJobManager,
	42	ICSVExporter, IGNORE_MARKER, DuplicationError, JOB_STATUS_MAP,
[9726]	43	IExportJobContainer, IExportJob, IExportContainerFinder)
[4806]	44
	45	class BatchProcessor(grok.GlobalUtility):
	46	"""A processor to add, update, or remove data.
	47
	48	This is a non-active baseclass.
	49	"""
[8220]	50	grok.implements(IBatchProcessor)
[4806]	51	grok.context(Interface)
	52	grok.baseclass()
	53
	54	# Name used in pages and forms...
[7933]	55	name = u'Non-registered base processor'
[6259]	56
[4806]	57	# Internal name...
[12869]	58	util_name = ''
[6259]	59
[4806]	60	# Items for this processor need an interface with zope.schema fields.
[5009]	61	iface = Interface
[6259]	62
[4806]	63	# The name must be the same as the util_name attribute in order to
	64	# register this utility correctly.
	65	grok.name(util_name)
	66
	67	# Headers needed to locate items...
[12869]	68	location_fields = []
[6259]	69
[4806]	70	# A factory with this name must be registered...
[12869]	71	factory_name = ''
[4806]	72
	73	@property
	74	def required_fields(self):
[4829]	75	"""Required fields that have no default.
	76
	77	A list of names of field, whose value cannot be set if not
	78	given during creation. Therefore these fields must exist in
	79	input.
	80
	81	Fields with a default != missing_value do not belong to this
	82	category.
	83	"""
[4806]	84	result = []
	85	for key, field in getFields(self.iface).items():
	86	if key in self.location_fields:
	87	continue
[4829]	88	if field.default is not field.missing_value:
	89	continue
[4806]	90	if field.required:
	91	result.append(key)
	92	return result
[6259]	93
[4806]	94	@property
	95	def req(self):
	96	result = dict(
	97	create = self.location_fields + self.required_fields,
	98	update = self.location_fields,
	99	remove = self.location_fields,
	100	)
	101	return result
	102
	103	@property
	104	def available_fields(self):
	105	return sorted(list(set(
	106	self.location_fields + getFields(self.iface).keys())))
[6259]	107
[4806]	108	def getHeaders(self, mode='create'):
	109	return self.available_fields
	110
	111	def checkHeaders(self, headerfields, mode='create'):
	112	req = self.req[mode]
	113	# Check for required fields...
	114	for field in req:
	115	if not field in headerfields:
	116	raise FatalCSVError(
	117	"Need at least columns %s for import!" %
	118	', '.join(["'%s'" % x for x in req]))
[6828]	119	# Check for double fields. Cannot happen because this error is
	120	# already catched in views
[4806]	121	not_ignored_fields = [x for x in headerfields
	122	if not x.startswith('--')]
	123	if len(set(not_ignored_fields)) < len(not_ignored_fields):
	124	raise FatalCSVError(
	125	"Double headers: each column name may only appear once.")
	126	return True
	127
	128	def applyMapping(self, row, mapping):
[4811]	129	"""Apply mapping to a row of CSV data.
	130	"""
[4806]	131	result = dict()
	132	for key, replacement in mapping.items():
[6824]	133	if replacement == u'--IGNORE--':
	134	# Skip ignored columns in failed and finished data files.
	135	continue
[4806]	136	result[replacement] = row[key]
	137	return result
[6259]	138
[4832]	139	def getMapping(self, path, headerfields, mode):
[6824]	140	"""Get a mapping from CSV file headerfields to actually used fieldnames.
	141
[4811]	142	"""
[4832]	143	result = dict()
[10027]	144	reader = unicodecsv.reader(open(path, 'rb'))
[4806]	145	raw_header = reader.next()
[4832]	146	for num, field in enumerate(headerfields):
	147	if field not in self.location_fields and mode == 'remove':
[6824]	148	# Skip non-location fields when removing.
	149	continue
	150	if field == u'--IGNORE--':
	151	# Skip ignored columns in failed and finished data files.
	152	continue
[4832]	153	result[raw_header[num]] = field
	154	return result
[4806]	155
[6273]	156	def stringFromErrs(self, errors, inv_errors):
	157	result = []
	158	for err in errors:
	159	fieldname, message = err
	160	result.append("%s: %s" % (fieldname, message))
	161	for err in inv_errors:
	162	result.append("invariant: %s" % err)
	163	return '; '.join(result)
	164
[4806]	165	def callFactory(self, args, *kw):
	166	return createObject(self.factory_name)
	167
	168	def parentsExist(self, row, site):
[4811]	169	"""Tell whether the parent object for data in ``row`` exists.
	170	"""
[4806]	171	raise NotImplementedError('method not implemented')
	172
	173	def entryExists(self, row, site):
[4811]	174	"""Tell whether there already exists an entry for ``row`` data.
	175	"""
[4806]	176	raise NotImplementedError('method not implemented')
	177
	178	def getParent(self, row, site):
[4811]	179	"""Get the parent object for the entry in ``row``.
	180	"""
[4806]	181	raise NotImplementedError('method not implemented')
[6259]	182
[5009]	183	def getEntry(self, row, site):
[12513]	184	"""Get the object for the entry in ``row``.
[5009]	185	"""
	186	raise NotImplementedError('method not implemented')
[6259]	187
[4806]	188	def addEntry(self, obj, row, site):
[4811]	189	"""Add the entry given given by ``row`` data.
	190	"""
[4806]	191	raise NotImplementedError('method not implemented')
	192
	193	def delEntry(self, row, site):
[4811]	194	"""Delete entry given by ``row`` data.
	195	"""
[6259]	196	raise NotImplementedError('method not implemented')
[4806]	197
[16012]	198	def checkCreateRequirements(self, parent, row, site):
	199	"""Checks requirements the parent object must fulfill when
	200	a new subobject is being created.
	201
	202	This method is not used in case of updating or removing objects.
	203
	204	Returns error messages as strings in case of requirement
	205	problems.
	206	"""
	207	return None
	208
[7950]	209	def checkUpdateRequirements(self, obj, row, site):
	210	"""Checks requirements the object must fulfill when being updated.
[7938]	211
	212	This method is not used in case of deleting or adding objects.
	213
[7950]	214	Returns error messages as strings in case of requirement
[7938]	215	problems.
[7937]	216	"""
[7938]	217	return None
[7937]	218
[16012]	219
	220	def checkRemoveRequirements(self, obj, row, site):
	221	"""Checks requirements the object must fulfill when being removed.
	222
	223	This method is not used in case of updating or adding objects.
	224
	225	Returns error messages as strings in case of requirement
	226	problems.
	227	"""
	228	return None
	229
[9706]	230	def updateEntry(self, obj, row, site, filename):
[4984]	231	"""Update obj to the values given in row.
[8220]	232
	233	Returns a string describing the fields changed.
[4984]	234	"""
[8220]	235	changed = []
[4829]	236	for key, value in row.items():
[8220]	237	# Skip fields to be ignored.
	238	if value == IGNORE_MARKER:
	239	continue
[8304]	240	# Skip fields not declared in interface and which are
	241	# not yet attributes of existing objects. We can thus not
	242	# add non-existing attributes here.
[8220]	243	if not hasattr(obj, key):
	244	continue
[13159]	245	# DefaultObjectConverter.fromStringDict fails for
	246	# list-of-choices fields because we are using a different
	247	# widget for this combination. Thus the ListFieldConverter
	248	# returns a useless dictionary which causes getWidgetsData to
	249	# skip the field. The value in row remains unchanged.
	250	# We have to evaluate the string and replace the value here.
[9265]	251	try:
[13159]	252	evalvalue = eval(value)
	253	if isinstance(evalvalue, list):
	254	value = evalvalue
	255	except:
	256	pass
	257	try:
[9265]	258	setattr(obj, key, value)
	259	except AttributeError:
	260	# Computed attributes can't be set.
	261	continue
[8222]	262	log_value = getattr(value, 'code', value)
	263	changed.append('%s=%s' % (key, log_value))
[8332]	264
[8333]	265	# If any catalog is involved it must be updated.
	266	#
	267	# XXX: The event is also triggered when creating objects as
	268	# updateEntry is called also when creating entries resulting
	269	# in objectAdded and additional objectModified events.
	270	if len(changed):
	271	notify(grok.ObjectModifiedEvent(obj))
[8332]	272
[8220]	273	return ', '.join(changed)
[4821]	274
[4832]	275	def createLogfile(self, path, fail_path, num, warnings, mode, user,
[4885]	276	timedelta, logger=None):
	277	"""Write to log file.
[4821]	278	"""
[4885]	279	if logger is None:
	280	return
[9739]	281	logger.info(
	282	"processed: %s, %s mode, %s lines (%s successful/ %s failed), "
	283	"%0.3f s (%0.4f s/item)" % (
	284	path, mode, num, num - warnings, warnings,
	285	timedelta, timedelta/(num or 1)))
[4821]	286	return
[4877]	287
	288	def writeFailedRow(self, writer, row, warnings):
	289	"""Write a row with error messages to error CSV.
	290
	291	If warnings is a list of strings, they will be concatenated.
	292	"""
	293	error_col = warnings
	294	if isinstance(warnings, list):
	295	error_col = ' / '.join(warnings)
	296	row['--ERRORS--'] = error_col
	297	writer.writerow(row)
	298	return
[6259]	299
[8220]	300	def checkConversion(self, row, mode='ignore', ignore_empty=True):
[6847]	301	"""Validates all values in row.
	302	"""
	303	converter = IObjectConverter(self.iface)
	304	errs, inv_errs, conv_dict = converter.fromStringDict(
[8220]	305	row, self.factory_name, mode=mode)
[6847]	306	return errs, inv_errs, conv_dict
	307
[12810]	308
	309	def emptyRow(self, row):
	310	"""Detect empty rows.
	311	"""
	312	for value in row.values():
[14424]	313	if not value in (None, IGNORE_MARKER) and value.strip():
[12810]	314	return False
	315	return True
	316
[4885]	317	def doImport(self, path, headerfields, mode='create', user='Unknown',
[8220]	318	logger=None, ignore_empty=True):
[12869]	319	"""In contrast to most other methods, `doImport` is not supposed to
[12867]	320	be customized, neither in custom packages nor in derived batch
	321	processor classes. Therefore, this is the only place where we
	322	do import data.
	323
	324	Before this method starts creating or updating persistent data, it
	325	prepares two more files in a temporary folder of the filesystem: (1)
	326	a file for pending data with file extension ``.pending`` and (2)
	327	a file for successfully processed data with file extension
	328	``.finished``. Then the method starts iterating over all rows of
	329	the CSV file. Each row is treated as follows:
	330
	331	1. An empty row is skipped.
	332
[12997]	333	2. Empty strings or lists (``[]``) in the row are replaced by
	334	ignore markers.
[12867]	335
[12868]	336	3. The `BatchProcessor.checkConversion` method validates and converts
	337	all values in the row. Conversion means the transformation of strings
	338	into Python objects. For instance, number expressions have to be
	339	transformed into integers, dates into datetime objects, phone number
	340	expressions into phone number objects, etc. The converter returns a
	341	dictionary with converted values or, if the validation of one of the
	342	elements fails, an appropriate warning message. If the conversion
	343	fails a pending record is created and stored in the pending data file
	344	together with a warning message the converter has raised.
[12867]	345
[12868]	346	4. In create mode only:
[12867]	347
[12868]	348	The parent object must be found and a child
	349	object with same object id must not exist. Otherwise the row
	350	is skipped, a corresponding warning message is raised and a
	351	record is stored in the pending data file.
[12867]	352
[16012]	353	The `BatchProcessor.checkCreateRequirements` method checks additional
	354	requirements the parent object must fulfill before a new sububject
	355	is being added. These requirements are not imposed by the data
	356	type but the context of the object. For example, the course results
	357	of graduated students must not changed by import, neither by
	358	creating nor updating or removing course tickets.
	359
[12869]	360	Now `doImport` tries to add the new object with the data
[12868]	361	from the conversion dictionary. In some cases this
[12869]	362	may fail and a `DuplicationError` is raised. For example, a new
[12868]	363	payment ticket is created but the same payment for same session
	364	has already been made. In this case the object id is unique, no
	365	other object with same id exists, but making the 'same' payment
	366	twice does not make sense. The import is skipped and a
	367	record is stored in the pending data file.
[12867]	368
[12868]	369	5. In update mode only:
	370
	371	If the object can't be found, the row is skipped,
	372	a ``no such entry`` warning message is raised and a record is
	373	stored in the pending data file.
	374
	375	The `BatchProcessor.checkUpdateRequirements` method checks additional
	376	requirements the object must fulfill before being updated. These
	377	requirements are not imposed by the data type but the context
	378	of the object. For example, post-graduate students have a different
	379	registration workflow. With this method we do forbid certain workflow
	380	transitions or states.
	381
[12869]	382	Finally, `doImport` updates the existing object with the data
[12868]	383	from the conversion dictionary.
	384
	385	6. In remove mode only:
	386
	387	If the object can't be found, the row is skipped,
	388	a ``no such entry`` warning message is raised and a record is
	389	stored in the pending data file.
	390
[16012]	391	The `BatchProcessor.checkRemoveRequirements` method checks additional
	392	requirements the object must fulfill before being removed.
	393	These requirements are not imposed by the data type but the context
	394	of the object. For example, the course results of graduated students
	395	must not changed by import, neither by creating nor updating or
	396	removing course tickets.
	397
[12869]	398	Finally, `doImport` removes the existing object.
[12868]	399
[4811]	400	"""
[4832]	401	time_start = time.time()
[4806]	402	self.checkHeaders(headerfields, mode)
[4832]	403	mapping = self.getMapping(path, headerfields, mode)
[10027]	404	reader = unicodecsv.DictReader(open(path, 'rb'))
[4889]	405
[4900]	406	temp_dir = tempfile.mkdtemp()
[6259]	407
[6273]	408	base = os.path.basename(path)
	409	(base, ext) = os.path.splitext(base)
[4900]	410	failed_path = os.path.join(temp_dir, "%s.pending%s" % (base, ext))
[6831]	411	failed_headers = mapping.values()
[4877]	412	failed_headers.append('--ERRORS--')
[10027]	413	failed_writer = unicodecsv.DictWriter(open(failed_path, 'wb'),
	414	failed_headers)
[8573]	415	os.chmod(failed_path, 0664)
[6831]	416	failed_writer.writerow(dict([(x,x) for x in failed_headers]))
[4891]	417
[4900]	418	finished_path = os.path.join(temp_dir, "%s.finished%s" % (base, ext))
[6831]	419	finished_headers = mapping.values()
[10027]	420	finished_writer = unicodecsv.DictWriter(open(finished_path, 'wb'),
	421	finished_headers)
[8905]	422	os.chmod(finished_path, 0664)
[4891]	423	finished_writer.writerow(dict([(x,x) for x in finished_headers]))
[6259]	424
[4806]	425	num =0
[4878]	426	num_warns = 0
[4806]	427	site = grok.getSite()
[7859]	428
[4806]	429	for raw_row in reader:
	430	num += 1
[12810]	431	# Skip row if empty
	432	if self.emptyRow(raw_row):
	433	continue
[4806]	434	string_row = self.applyMapping(raw_row, mapping)
[12981]	435	if ignore_empty:
	436	# Replace empty strings and empty lists with ignore-markers
[8222]	437	for key, val in string_row.items():
[12981]	438	if val == '' or val == '[]':
[8222]	439	string_row[key] = IGNORE_MARKER
	440	row = dict(string_row.items()) # create deep copy
[6847]	441	errs, inv_errs, conv_dict = self.checkConversion(string_row, mode)
[6273]	442	if errs or inv_errs:
[4878]	443	num_warns += 1
[6273]	444	conv_warnings = self.stringFromErrs(errs, inv_errs)
	445	self.writeFailedRow(
[6824]	446	failed_writer, string_row, conv_warnings)
[4821]	447	continue
[6273]	448	row.update(conv_dict)
[6259]	449
[4806]	450	if mode == 'create':
	451	if not self.parentsExist(row, site):
[4878]	452	num_warns += 1
[4877]	453	self.writeFailedRow(
[6824]	454	failed_writer, string_row,
[12868]	455	"Not all parents do exist yet.")
[4806]	456	continue
	457	if self.entryExists(row, site):
[4878]	458	num_warns += 1
[4877]	459	self.writeFailedRow(
[6824]	460	failed_writer, string_row,
[12868]	461	"This object already exists.")
[4806]	462	continue
[16012]	463	parent = self.getParent(row, site)
	464	create_errors = self.checkCreateRequirements(parent, row, site)
	465	if create_errors is not None:
	466	num_warns += 1
	467	self.writeFailedRow(
	468	failed_writer, string_row, create_errors)
	469	continue
[4806]	470	obj = self.callFactory()
[7273]	471	# Override all values in row, also
	472	# student_ids and applicant_ids which have been
	473	# generated in the respective __init__ methods before.
[9706]	474	self.updateEntry(obj, row, site, base)
[6243]	475	try:
	476	self.addEntry(obj, row, site)
[6273]	477	except KeyError, error:
[6219]	478	num_warns += 1
	479	self.writeFailedRow(
[12868]	480	failed_writer, string_row, error.message)
[8540]	481	continue
[8509]	482	except DuplicationError, error:
	483	num_warns += 1
	484	self.writeFailedRow(
[12868]	485	failed_writer, string_row, error.msg)
[6219]	486	continue
[15065]	487	except FatalCSVError, error:
	488	num_warns += 1
	489	self.writeFailedRow(
	490	failed_writer, string_row, error.message)
	491	continue
[4806]	492	elif mode == 'remove':
	493	if not self.entryExists(row, site):
[4878]	494	num_warns += 1
[4877]	495	self.writeFailedRow(
[6824]	496	failed_writer, string_row,
[9219]	497	"Cannot remove: no such entry")
[4806]	498	continue
[16012]	499	obj = self.getEntry(row, site)
	500	remove_errors = self.checkRemoveRequirements(obj, row, site)
	501	if remove_errors is not None:
	502	num_warns += 1
	503	self.writeFailedRow(
	504	failed_writer, string_row, remove_errors)
	505	continue
[4806]	506	self.delEntry(row, site)
	507	elif mode == 'update':
	508	obj = self.getEntry(row, site)
	509	if obj is None:
[4878]	510	num_warns += 1
[4877]	511	self.writeFailedRow(
[6824]	512	failed_writer, string_row,
[9219]	513	"Cannot update: no such entry")
[4806]	514	continue
[7950]	515	update_errors = self.checkUpdateRequirements(obj, row, site)
[7938]	516	if update_errors is not None:
[7937]	517	num_warns += 1
	518	self.writeFailedRow(
	519	failed_writer, string_row, update_errors)
	520	continue
[11849]	521	try:
	522	self.updateEntry(obj, row, site, base)
	523	except ConstraintNotSatisfied, err:
	524	num_warns += 1
	525	self.writeFailedRow(
	526	failed_writer, string_row,
	527	"ConstraintNotSatisfied: %s" % err)
[14976]	528	continue
[14552]	529	except RequiredMissing, err:
	530	num_warns += 1
	531	self.writeFailedRow(
	532	failed_writer, string_row,
	533	"RequiredMissing: %s" % err)
[11849]	534	continue
[4891]	535	finished_writer.writerow(string_row)
[4821]	536
[4832]	537	time_end = time.time()
	538	timedelta = time_end - time_start
[6259]	539
[4878]	540	self.createLogfile(path, failed_path, num, num_warns, mode, user,
[4885]	541	timedelta, logger=logger)
[4894]	542	failed_path = os.path.abspath(failed_path)
[4878]	543	if num_warns == 0:
[4821]	544	del failed_writer
	545	os.unlink(failed_path)
[4894]	546	failed_path = None
	547	return (num, num_warns,
	548	os.path.abspath(finished_path), failed_path)
[7859]	549
[9032]	550	def get_csv_skeleton(self):
	551	"""Export CSV file only with a header of available fields.
	552
	553	A raw string with CSV data should be returned.
	554	"""
	555	outfile = StringIO()
[10027]	556	writer = unicodecsv.DictWriter(outfile, self.available_fields)
[9734]	557	writer.writerow(
	558	dict(zip(self.available_fields, self.available_fields))) # header
[9032]	559	outfile.seek(0)
	560	return outfile.read()
	561
[7859]	562	class ExporterBase(object):
	563	"""A base for exporters.
	564	"""
	565	grok.implements(ICSVExporter)
	566
	567	#: Fieldnames considered by this exporter
	568	fields = ('code', 'title', 'title_prefix')
	569
[7907]	570	#: The title under which this exporter will be displayed
	571	#: (if registered as a utility)
	572	title = 'Override this title'
	573
[7859]	574	def mangle_value(self, value, name, context=None):
[12857]	575	"""Hook for mangling values in derived classes.
[7859]	576	"""
	577	if isinstance(value, bool):
	578	value = value and '1' or '0'
	579	elif isinstance(value, unicode):
	580	# CSV writers like byte streams better than unicode
	581	value = value.encode('utf-8')
[8380]	582	elif isinstance(value, datetime.datetime):
[11737]	583	#value = str(value)
	584	value = str('%s#' % value) # changed 2014-07-06, see ticket #941
[8380]	585	elif isinstance(value, datetime.date):
	586	# Order is important here: check for date after datetime as
	587	# datetimes are also dates.
	588	#
	589	# Append hash '#' to dates to circumvent unwanted excel automatic
	590	value = str('%s#' % value)
[7859]	591	elif value is None:
	592	# None is not really representable in CSV files
	593	value = ''
	594	return value
	595
	596	def get_csv_writer(self, filepath=None):
	597	"""Get a CSV dict writer instance open for writing.
	598
	599	Returns a tuple (<writer>, <outfile>) where ``<writer>`` is a
	600	:class:`csv.DictWriter` instance and outfile is the real file
	601	which is written to. The latter is important when writing to
	602	StringIO and can normally be ignored otherwise.
	603
	604	The returned file will already be filled with the header row.
	605
	606	Please note that if you give a filepath, the returned outfile
	607	is open for writing only and you might have to close it before
	608	reopening it for reading.
	609	"""
	610	if filepath is None:
	611	outfile = StringIO()
	612	else:
	613	outfile = open(filepath, 'wb')
[10027]	614	writer = unicodecsv.DictWriter(outfile, self.fields)
[7859]	615	writer.writerow(dict(zip(self.fields, self.fields))) # header
	616	return writer, outfile
	617
	618	def write_item(self, obj, writer):
	619	"""Write a row extracted from `obj` into CSV file using `writer`.
	620	"""
	621	row = {}
	622	for name in self.fields:
	623	value = getattr(obj, name, None)
	624	value = self.mangle_value(value, name, obj)
	625	row[name] = value
	626	writer.writerow(row)
	627	return
	628
	629	def close_outfile(self, filepath, outfile):
	630	"""Close outfile.
	631	If filepath is None, the contents of outfile is returned.
	632	"""
	633	outfile.seek(0)
	634	if filepath is None:
	635	return outfile.read()
	636	outfile.close()
	637	return
	638
[9797]	639	def get_filtered(self, site, **kw):
	640	"""Get datasets to export filtered by keyword arguments.
	641	Returns an iterable.
	642	"""
	643	raise NotImplementedError
	644
[12516]	645	def get_selected(self, site, selected):
	646	"""Get datasets to export for selected items
	647	specified by a list of identifiers.
	648	Returns an iterable.
	649	"""
	650	raise NotImplementedError
	651
[7859]	652	def export(self, iterable, filepath=None):
	653	"""Export `iterable` as CSV file.
	654	If `filepath` is ``None``, a raw string with CSV data should
	655	be returned.
	656	"""
	657	raise NotImplementedError
	658
	659	def export_all(self, site, filepath=None):
	660	"""Export all appropriate objects in `site` into `filepath` as
	661	CSV data.
	662	If `filepath` is ``None``, a raw string with CSV data should
	663	be returned.
	664	"""
	665	raise NotImplementedError
[9217]	666
[9797]	667	def export_filtered(self, site, filepath=None, **kw):
[12861]	668	"""Export items denoted by `kw`.
[9797]	669	If `filepath` is ``None``, a raw string with CSV data should
	670	be returned.
	671	"""
	672	data = self.get_filtered(site, **kw)
	673	return self.export(data, filepath=filepath)
	674
[12516]	675	def export_selected(self, site, filepath=None, **kw):
	676	"""Export those items specified by a list of identifiers
	677	called `selected`.
	678	If `filepath` is ``None``, a raw string with CSV data should
	679	be returned.
	680	"""
	681	selected = kw.get('selected', [])
	682	data = self.get_selected(site, selected)
	683	return self.export(data, filepath=filepath)
	684
[9797]	685	def export_job(site, exporter_name, **kw):
[9217]	686	"""Export all entries delivered by exporter and store it in a temp file.
	687
	688	`site` gives the site to search. It will be passed to the exporter
	689	and also be set as 'current site' as the function is used in
	690	asynchronous jobs which run in their own threads and have no site
	691	set initially. Therefore `site` must also be a valid value for use
	692	with `zope.component.hooks.setSite()`.
	693
	694	`exporter_name` is the utility name under which the desired
	695	exporter was registered with the ZCA.
	696
	697	The resulting CSV file will be stored in a new temporary directory
	698	(using :func:`tempfile.mkdtemp`). It will be named after the
	699	exporter used with `.csv` filename extension.
	700
	701	Returns the path to the created CSV file.
	702
	703	.. note:: It is the callers responsibility to clean up the used
	704	file and its parent directory.
	705	"""
	706	setSite(site)
	707	exporter = getUtility(ICSVExporter, name=exporter_name)
	708	output_dir = tempfile.mkdtemp()
	709	filename = '%s.csv' % exporter_name
	710	output_path = os.path.join(output_dir, filename)
[9797]	711	if kw == {}:
	712	exporter.export_all(site, filepath=output_path)
[12516]	713	elif kw.has_key('selected'):
	714	exporter.export_selected(site, filepath=output_path, **kw)
[9797]	715	else:
	716	exporter.export_filtered(site, filepath=output_path, **kw)
[9217]	717	return output_path
	718
	719	class AsyncExportJob(AsyncJob):
	720	"""An IJob that exports data to CSV files.
	721
	722	`AsyncExportJob` instances are regular `AsyncJob` instances with a
	723	different constructor API. Instead of a callable to execute, you
	724	must pass a `site` and some `exporter_name` to trigger an export.
	725
	726	The real work is done when an instance of this class is put into a
	727	queue. See :mod:`waeup.kofa.async` to learn more about
	728	asynchronous jobs.
	729
	730	The `exporter_name` must be the name under which an ICSVExporter
	731	utility was registered with the ZCA.
	732
	733	The `site` must be a valid site or ``None``.
	734
	735	The result of an `AsyncExportJob` is the path to generated CSV
	736	file. The file will reside in a temporary directory that should be
	737	removed after being used.
	738	"""
	739	grok.implements(IExportJob)
	740
[9718]	741	def __init__(self, site, exporter_name, args, *kwargs):
[9217]	742	super(AsyncExportJob, self).__init__(
[9718]	743	export_job, site, exporter_name, args, *kwargs)
[9217]	744
[9816]	745	@property
	746	def finished(self):
	747	"""A job is marked `finished` if it is completed.
	748
	749	Please note: a finished report job does not neccessarily
	750	provide an IReport result. See meth:`failed`.
	751	"""
	752	return self.status == zc.async.interfaces.COMPLETED
	753
	754	@property
	755	def failed(self):
	756	"""A report job is marked failed iff it is finished and the
	757	result is None.
	758
	759	While a job is unfinished, the `failed` status is ``None``.
	760
	761	Failed jobs normally provide a `traceback` to examine reasons.
	762	"""
	763	if not self.finished:
	764	return None
	765	if getattr(self, 'result', None) is None:
	766	return True
	767	return False
	768
[9217]	769	class ExportJobContainer(object):
	770	"""A mix-in that provides functionality for asynchronous export jobs.
	771	"""
	772	grok.implements(IExportJobContainer)
	773	running_exports = PersistentList()
	774
[9718]	775	def start_export_job(self, exporter_name, user_id, args, *kwargs):
[9217]	776	"""Start asynchronous export job.
	777
	778	`exporter_name` is the name of an exporter utility to be used.
	779
	780	`user_id` is the ID of the user that triggers the export.
	781
	782	The job_id is stored along with exporter name and user id in a
	783	persistent list.
	784
[9718]	785	The method supports additional positional and keyword
	786	arguments, which are passed as-is to the respective
	787	:class:`AsyncExportJob`.
	788
[9217]	789	Returns the job ID of the job started.
	790	"""
	791	site = grok.getSite()
	792	manager = getUtility(IJobManager)
[9718]	793	job = AsyncExportJob(site, exporter_name, args, *kwargs)
[9217]	794	job_id = manager.put(job)
	795	# Make sure that the persisted list is stored in ZODB
	796	self.running_exports = PersistentList(self.running_exports)
	797	self.running_exports.append((job_id, exporter_name, user_id))
	798	return job_id
	799
	800	def get_running_export_jobs(self, user_id=None):
	801	"""Get export jobs for user with `user_id` as list of tuples.
	802
	803	Each tuples holds ``<job_id>, <exporter_name>, <user_id>`` in
	804	that order. The ``<exporter_name>`` is the utility name of the
	805	used exporter.
	806
	807	If `user_id` is ``None``, all running jobs are returned.
	808	"""
	809	entries = []
	810	to_delete = []
	811	manager = getUtility(IJobManager)
	812	for entry in self.running_exports:
	813	if user_id is not None and entry[2] != user_id:
	814	continue
	815	if manager.get(entry[0]) is None:
	816	to_delete.append(entry)
	817	continue
	818	entries.append(entry)
	819	if to_delete:
	820	self.running_exports = PersistentList(
	821	[x for x in self.running_exports if x not in to_delete])
	822	return entries
	823
	824	def get_export_jobs_status(self, user_id=None):
	825	"""Get running/completed export jobs for `user_id` as list of tuples.
	826
	827	Each tuple holds ``<raw status>, <status translated>,
	828	<exporter title>`` in that order, where ``<status
	829	translated>`` and ``<exporter title>`` are translated strings
	830	representing the status of the job and the human readable
	831	title of the exporter used.
	832	"""
	833	entries = self.get_running_export_jobs(user_id)
	834	result = []
	835	manager = getUtility(IJobManager)
	836	for entry in entries:
	837	job = manager.get(entry[0])
	838	if job is None:
	839	continue
	840	status, status_translated = JOB_STATUS_MAP[job.status]
	841	exporter_name = getUtility(ICSVExporter, name=entry[1]).title
	842	result.append((status, status_translated, exporter_name))
	843	return result
	844
	845	def delete_export_entry(self, entry):
	846	"""Delete the export denoted by `entry`.
	847
	848	Removes given entry from the local `running_exports` list and also
	849	removes the regarding job via the local job manager.
	850
	851	`entry` must be a tuple ``(<job id>, <exporter name>, <user
	852	id>)`` as created by :meth:`start_export_job` or returned by
	853	:meth:`get_running_export_jobs`.
	854	"""
	855	manager = getUtility(IJobManager)
	856	job = manager.get(entry[0])
	857	if job is not None:
	858	# remove created export file
	859	if isinstance(job.result, basestring):
	860	if os.path.exists(os.path.dirname(job.result)):
	861	shutil.rmtree(os.path.dirname(job.result))
	862	manager.remove(entry[0], self)
	863	new_entries = [x for x in self.running_exports
	864	if x != entry]
	865	self.running_exports = PersistentList(new_entries)
	866	return
	867
	868	def entry_from_job_id(self, job_id):
	869	"""Get entry tuple for `job_id`.
	870
	871	Returns ``None`` if no such entry can be found.
	872	"""
	873	for entry in self.running_exports:
	874	if entry[0] == job_id:
	875	return entry
	876	return None
[9726]	877
	878	class VirtualExportJobContainer(ExportJobContainer):
	879	"""A virtual export job container.
	880
	881	Virtual ExportJobContainers can be used as a mixin just like real
	882	ExportJobContainer.
	883
	884	They retrieve and store data in the site-wide ExportJobContainer.
	885
	886	Functionality is currently entirely as for regular
	887	ExportJobContainers, except that data is stored elsewhere.
	888
	889	VirtualExportJobContainers need a registered
	890	IExportContainerFinder utility to find a suitable container for
	891	storing data.
	892	"""
	893	grok.implements(IExportJobContainer)
	894
	895	@property
	896	def _site_container(self):
	897	return getUtility(IExportContainerFinder)()
	898
	899	# The following is a simple trick. While ExportJobContainers store
	900	# only one attribute in ZODB, it is sufficient to replace this
	901	# attribute `running_exports` with a suitable manager to make the
	902	# whole virtual container work like the original but with the data
	903	# stored in the site-wide exports container. This way, virtual
	904	# export containers provide the whole functionality of a regular
	905	# exports container but store no data at all with themselves.
	906	@property
	907	def running_exports(self):
	908	"""Exports stored in the site-wide exports container.
	909	"""
	910	return self._site_container.running_exports
	911
	912	@running_exports.setter
	913	def running_exports(self, value):
	914	self._site_container.running_exports = value
	915
	916	@running_exports.deleter
	917	def running_exports(self):
	918	del self._site_container.running_exports
	919
[9823]	920	@property
	921	def logger(self):
	922	return self._site_container.logger
[9726]	923
	924	@implementer(IExportContainerFinder)
	925	class ExportContainerFinder(grok.GlobalUtility):
	926	"""Finder for local (site-wide) export container.
	927	"""
	928
	929	def __call__(self):
	930	"""Get the local export container-
	931
	932	If no site can be determined or the site provides no export
	933	container, None is returned.
	934	"""
	935	site = grok.getSite()
	936	if site is None:
	937	return None
	938	return site.get('datacenter', None)

Note: See TracBrowser for help on using the repository browser.

Context navigation

source: main/waeup.kofa/trunk/src/waeup/kofa/utils/batching.py @ 16231

Download in other formats: