Context navigation

source: main/waeup.kofa/branches/uli-zc-async/src/waeup/kofa/datacenter.py @ 10009

Last change on this file since 10009 was 9211, checked in by uli, 12 years ago
Rollback r9209. Looks like multiple merges from trunk confuse svn when merging back into trunk.
Property svn:keywords set to `Id`
File size: 13.7 KB

Line
1	## $Id: datacenter.py 9211 2012-09-21 08:19:35Z uli $
2	##
3	## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""Kofa data center.
19
20	The waeup data center cares for management of upload data and provides
21	tools for importing/exporting CSV data.
22	"""
23	import os
24	import re
25	import shutil
26	import grok
27	from datetime import datetime
28	from zope.component import getUtility
29	from zope.component.interfaces import ObjectEvent
30	from waeup.kofa.interfaces import (IDataCenter, IDataCenterFile,
31	IDataCenterStorageMovedEvent,
32	IDataCenterConfig)
33	from waeup.kofa.utils.batching import ExportJobContainer
34	from waeup.kofa.utils.helpers import copy_filesystem_tree, merge_csv_files
35	from waeup.kofa.utils.logger import Logger
36
37	#: Regular expression describing a logfile name with backup extension
38	RE_LOGFILE_BACKUP_NAME = re.compile('^.+\.\d+$')
39
40	class DataCenter(grok.Container, Logger, ExportJobContainer):
41	"""A data center contains CSV files.
42	"""
43	grok.implements(IDataCenter)
44
45	logger_name = 'waeup.kofa.${sitename}.datacenter'
46	logger_filename = 'datacenter.log'
47
48	def __init__(self, args, *kw):
49	super(DataCenter, self).__init__(args, *kw)
50	self.storage = getUtility(IDataCenterConfig)['path']
51	self._createSubDirs()
52
53	def _createSubDirs(self):
54	"""Create standard subdirs.
55	"""
56	for name in ['finished', 'unfinished', 'logs', 'deleted']:
57	path = os.path.join(self.storage, name)
58	if os.path.exists(path):
59	continue
60	os.mkdir(path)
61	return
62
63	@property
64	def deleted_path(self):
65	"""Get the path for deleted object data.
66	"""
67	return os.path.join(self.storage, 'deleted')
68
69	def getFiles(self, sort='name'):
70	"""Get a list of files stored in `storage`.
71
72	Files are sorted by basename.
73	"""
74	result = []
75	if not os.path.exists(self.storage):
76	return result
77	for filename in sorted(os.listdir(self.storage)):
78	fullpath = os.path.join(self.storage, filename)
79	if not os.path.isfile(fullpath):
80	continue
81	result.append(DataCenterFile(fullpath))
82	if sort == 'date':
83	# sort results in newest-first order...
84	result = sorted(result, key=lambda x: x.getTimeStamp(),
85	reverse=True)
86	return result
87
88	def getLogFiles(self, exclude_backups=True):
89	"""Get the files from logs/ subdir. Files are sorted by name.
90
91	By default backup logs ('app.log.1', etc.) are excluded.
92	"""
93	result = []
94	logdir = os.path.join(self.storage, 'logs')
95	if not os.path.exists(logdir):
96	os.mkdir(logdir)
97	for name in sorted(os.listdir(logdir)):
98	if not os.path.isfile(os.path.join(logdir, name)):
99	continue
100	if exclude_backups == True and RE_LOGFILE_BACKUP_NAME.match(name):
101	continue
102	result.append(
103	LogFile(os.path.join(self.storage, 'logs', name)))
104	return result
105
106	def setStoragePath(self, path, move=False, overwrite=False):
107	"""Set the path where to store files.
108	"""
109	path = os.path.abspath(path)
110	not_copied = []
111	if not os.path.exists(path):
112	raise ValueError('The path given does not exist: %s' % path)
113	if move is True:
114	not_copied = copy_filesystem_tree(self.storage, path,
115	overwrite=overwrite)
116	self.storage = path
117	self._createSubDirs()
118	grok.notify(DataCenterStorageMovedEvent(self))
119	return not_copied
120
121	def _moveFile(self, source, dest):
122	"""Move file source to dest preserving ctime, mtime, etc.
123	"""
124	if not os.path.exists(source):
125	self.logger.warn('No such source path: %s' % source)
126	return
127	if source == dest:
128	return
129	shutil.copyfile(source, dest)
130	shutil.copystat(source, dest)
131	os.unlink(source)
132
133	def _appendCSVFile(self, source, dest):
134	"""Append data from CSV file `source` to data from CSV file `dest`.
135
136	The `source` file is deleted afterwards.
137	"""
138	if not os.path.exists(dest):
139	return self._moveFile(source, dest)
140	if not os.path.exists(source):
141	self.logger.warn('No such source path: %s' % source)
142	return
143	if source == dest:
144	return
145	result_path = merge_csv_files(dest, source)
146	self._moveFile(result_path, dest)
147	os.unlink(source)
148
149	def distProcessedFiles(self, successful, source_path, finished_file,
150	pending_file, mode='create', move_orig=True):
151	"""Put processed files into final locations.
152
153	``successful`` is a boolean that tells, whether processing was
154	successful.
155
156	``source_path``: path to file that was processed.
157
158	``finished_file``, ``pending_file``: paths to the respective
159	generated .pending and .finished file. The .pending file path
160	may be ``None``.
161
162	If finished file is placed in a location outside the local
163	storage dir, the complete directory is removed
164	afterwards. Regular processors should put their stuff in
165	dedicated temporary dirs.
166
167	See datacenter.txt for more info about how this works.
168	"""
169	basename = os.path.basename(source_path)
170	pending_name = basename
171	pending = False
172	finished_dir = os.path.join(self.storage, 'finished')
173	unfinished_dir = os.path.join(self.storage, 'unfinished')
174
175	if basename.endswith('.pending.csv'):
176	maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
177	maybe_src = os.path.join(unfinished_dir, maybe_basename)
178	if os.path.isfile(maybe_src):
179	basename = maybe_basename
180	pending = True
181
182	base, ext = os.path.splitext(basename)
183	finished_name = "%s.%s.finished%s" % (base, mode, ext)
184	if not pending:
185	pending_name = "%s.%s.pending%s" % (base, mode, ext)
186
187	# Put .pending and .finished file into respective places...
188	pending_dest = os.path.join(self.storage, pending_name)
189	finished_dest = os.path.join(finished_dir, finished_name)
190	self._appendCSVFile(finished_file, finished_dest)
191	if pending_file is not None:
192	self._moveFile(pending_file, pending_dest)
193
194	# Put source file into final location...
195	finished_dest = os.path.join(finished_dir, basename)
196	unfinished_dest = os.path.join(unfinished_dir, basename)
197	if successful and not pending:
198	self._moveFile(source_path, finished_dest)
199	elif successful and pending:
200	self._moveFile(unfinished_dest, finished_dest)
201	os.unlink(source_path)
202	elif not successful and not pending:
203	self._moveFile(source_path, unfinished_dest)
204
205	# If finished and pending-file were created in a location
206	# outside datacenter storage, we remove it.
207	maybe_temp_dir = os.path.dirname(finished_file)
208	if os.path.commonprefix(
209	[self.storage, maybe_temp_dir]) != self.storage:
210	shutil.rmtree(maybe_temp_dir)
211	return
212
213	def _logfiles(self, basename):
214	"""Get sorted logfiles starting with `basename`.
215	"""
216	def numerical_suffix(name):
217	# return numerical suffix in `name` as number or 0.
218	suffix = name.rsplit('.', 1)[-1]
219	try:
220	return int(suffix)
221	except ValueError:
222	return 0
223	pass
224	files = [basename,]
225	for name in os.listdir(os.path.join(self.storage, 'logs')):
226	if RE_LOGFILE_BACKUP_NAME.match(name):
227	if name.rsplit('.', 1)[0] == basename:
228	files.append(name)
229	return sorted(files, key=numerical_suffix, reverse=True)
230
231	def queryLogfiles(self, basename, query=None, limit=0, start=0):
232	"""Search `query` in all logfiles starting with `basename`.
233
234	Returns an iterator of those lines in logfiles starting with
235	`basename` that match `query`. If you want the result as a
236	list, simply list() the iterator.
237
238	All logfiles with name `basename` and maybe some numerical
239	extension ('.1', '.2', ...) are searched for the `query` term
240	in correct chronological order. So, if you ask for a basename 'app.log',
241	then any file named 'app2.log', 'app.log.1', 'app.log',
242	etc. will be searched in that order.
243
244	The `query` is expected to be a string containing a regular
245	expression.
246
247	If `limit` is set to some numerical value, at most this number
248	of lines is returned.
249
250	With `start` you can give the number of first matched line to
251	return. `start` is zero-based, i.e. the first match has number
252	0, the scond one 1, etc.
253
254	Together with `limit` this allows some basic
255	batching. Please keep in mind that batching might give
256	unpredictable results, when logfiles change between two
257	requests. This is not a problem when only one file is searched
258	and changes include only appending new log messages.
259
260	Matches are found per line only (no multiline matches).
261
262	This method raises ValueError if some basic condition is not
263	met, for instance if the given query string is not a valid
264	regular expression.
265
266	Please note, that this exception will happen not before you
267	really fetch a result line.
268	"""
269	try:
270	re_query = re.compile(query)
271	except:
272	raise ValueError('Invalid query string: %s' % query)
273
274	basename = basename.replace('/../', '')
275	files = self._logfiles(basename)
276
277	# Search the log files
278	num = 0
279	for name in files:
280	path = os.path.join(self.storage, 'logs', name)
281	if not os.path.isfile(path):
282	continue
283	for line in open(path, 'rb'):
284	if not re_query.search(line):
285	continue
286	num += 1
287	if (num - 1) < start:
288	continue
289	yield line
290
291	if limit and (num - limit >= start):
292	raise StopIteration
293	pass
294
295	class DataCenterFile(object):
296	"""A description of a file stored in data center.
297	"""
298	grok.implements(IDataCenterFile)
299
300	def __init__(self, context):
301	self.context = context
302	self.name = os.path.basename(self.context)
303	self.size = self.getSize()
304	self.uploaddate = self.getDate()
305	self.lines = self.getLinesNumber()
306
307	def getDate(self):
308	"""Get a human readable datetime representation.
309	"""
310	date = datetime.fromtimestamp(os.path.getctime(self.context))
311	return date.strftime("%Y-%m-%d %H:%M:%S")
312
313	def getTimeStamp(self):
314	"""Get a (machine readable) timestamp.
315	"""
316	return os.path.getctime(self.context)
317
318	def getSize(self):
319	"""Get a human readable file size.
320	"""
321	bytesize = os.path.getsize(self.context)
322	size = "%s bytes" % bytesize
323	units = ['kb', 'MB', 'GB']
324	for power, unit in reversed(list(enumerate(units))):
325	power += 1
326	if bytesize >= 1024 ** power:
327	size = "%.2f %s" % (bytesize/(1024.0**power), unit)
328	break
329	return size
330
331	def getLinesNumber(self):
332	"""Get number of lines.
333	"""
334	num = 0
335	for line in open(self.context, 'rb'):
336	num += 1
337	return num
338
339	class LogFile(DataCenterFile):
340	"""A description of a log file.
341	"""
342	def __init__(self, context):
343	super(LogFile, self).__init__(context)
344	self._markers = dict()
345	self._parsed = False
346	self.userid = self.getUserId()
347	self.mode = self.getMode()
348	self.stats = self.getStats()
349	self.source = self.getSourcePath()
350
351	def _parseFile(self, maxline=10):
352	"""Find markers in a file.
353	"""
354	if self._parsed:
355	return
356	for line in open(self.context, 'rb'):
357	line = line.strip()
358	if not ':' in line:
359	continue
360	name, text = line.split(':', 1)
361	self._markers[name.lower()] = text
362	self._parsed = True
363	return
364
365	def _getMarker(self, marker):
366	marker = marker.lower()
367	if not self._parsed:
368	self._parseFile()
369	if marker in self._markers.keys():
370	return self._markers[marker]
371
372	def getUserId(self):
373	return self._getMarker('user') or '<UNKNOWN>'
374
375	def getMode(self):
376	return self._getMarker('mode') or '<NOT SET>'
377
378	def getStats(self):
379	return self._getMarker('processed') or '<Info not avail.>'
380
381	def getSourcePath(self):
382	return self._getMarker('source') or None
383
384
385	class DataCenterStorageMovedEvent(ObjectEvent):
386	"""An event fired, when datacenter storage moves.
387	"""
388	grok.implements(IDataCenterStorageMovedEvent)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: