source: main/waeup.kofa/trunk/src/waeup/kofa/datacenter.py @ 8372

Last change on this file since 8372 was 8372, checked in by uli, 12 years ago

Add deleted dir in standard filecenter paths.

  • Property svn:keywords set to Id
File size: 9.5 KB
Line 
1## $Id: datacenter.py 8372 2012-05-06 20:48:22Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Kofa data center.
19
20The waeup data center cares for management of upload data and provides
21tools for importing/exporting CSV data.
22"""
23import logging
24import os
25import shutil
26import grok
27from datetime import datetime
28from zope.component import getUtility
29from zope.component.interfaces import ObjectEvent
30from waeup.kofa.interfaces import (IDataCenter, IDataCenterFile,
31                                   IDataCenterStorageMovedEvent,
32                                   IDataCenterConfig)
33from waeup.kofa.utils.helpers import copy_filesystem_tree
34from waeup.kofa.utils.logger import Logger
35
36class DataCenter(grok.Container, Logger):
37    """A data center contains CSV files.
38    """
39    grok.implements(IDataCenter)
40
41    logger_name = 'waeup.kofa.${sitename}.datacenter'
42    logger_filename = 'datacenter.log'
43
44    def __init__(self, *args, **kw):
45        super(DataCenter, self).__init__(*args, **kw)
46        self.storage = getUtility(IDataCenterConfig)['path']
47        self._createSubDirs()
48
49    def _createSubDirs(self):
50        """Create standard subdirs.
51        """
52        for name in ['finished', 'unfinished', 'logs', 'deleted']:
53            path = os.path.join(self.storage, name)
54            if os.path.exists(path):
55                continue
56            os.mkdir(path)
57        return
58
59    def getFiles(self, sort='name'):
60        """Get a list of files stored in `storage`.
61
62        Files are sorted by basename.
63        """
64        result = []
65        if not os.path.exists(self.storage):
66            return result
67        for filename in sorted(os.listdir(self.storage)):
68            fullpath = os.path.join(self.storage, filename)
69            if not os.path.isfile(fullpath):
70                continue
71            result.append(DataCenterFile(fullpath))
72        if sort == 'date':
73            # sort results in newest-first order...
74            result = sorted(result, key=lambda x: x.getTimeStamp(),
75                            reverse=True)
76        return result
77
78    def getLogFiles(self):
79        """Get the files from logs/ subdir. Files are sorted by name.
80        """
81        result = []
82        logdir = os.path.join(self.storage, 'logs')
83        if not os.path.exists(logdir):
84            os.mkdir(logdir)
85        for name in sorted(os.listdir(logdir)):
86            if not os.path.isfile(os.path.join(logdir, name)):
87                continue
88            result.append(
89                LogFile(os.path.join(self.storage, 'logs', name)))
90        return result
91
92    def setStoragePath(self, path, move=False, overwrite=False):
93        """Set the path where to store files.
94        """
95        path = os.path.abspath(path)
96        not_copied = []
97        if not os.path.exists(path):
98            raise ValueError('The path given does not exist: %s' % path)
99        if move is True:
100            not_copied = copy_filesystem_tree(self.storage, path,
101                                            overwrite=overwrite)
102        self.storage = path
103        self._createSubDirs()
104        grok.notify(DataCenterStorageMovedEvent(self))
105        return not_copied
106
107    def _moveFile(self, source, dest):
108        """Move file source to dest preserving ctime, mtime, etc.
109        """
110        if not os.path.exists(source):
111            self.logger.warn('No such source path: %s' % source)
112            return
113        if source == dest:
114            return
115        shutil.copyfile(source, dest)
116        shutil.copystat(source, dest)
117        os.unlink(source)
118
119    def distProcessedFiles(self, successful, source_path, finished_file,
120                           pending_file, mode='create', move_orig=True):
121        """Put processed files into final locations.
122
123        ``successful`` is a boolean that tells, whether processing was
124        successful.
125
126        ``source_path``: path to file that was processed.
127
128        ``finished_file``, ``pending_file``: paths to the respective
129        generated .pending and .finished file. The .pending file path
130        may be ``None``.
131
132        If finished file is placed in a location outside the local
133        storage dir, the complete directory is removed
134        afterwards. Regular processors should put their stuff in
135        dedicated temporary dirs.
136
137        See datacenter.txt for more info about how this works.
138        """
139        basename = os.path.basename(source_path)
140        pending_name = basename
141        pending = False
142        finished_dir = os.path.join(self.storage, 'finished')
143        unfinished_dir = os.path.join(self.storage, 'unfinished')
144
145        if basename.endswith('.pending.csv'):
146            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
147            maybe_src = os.path.join(unfinished_dir, maybe_basename)
148            if os.path.isfile(maybe_src):
149                basename = maybe_basename
150                pending = True
151
152        base, ext = os.path.splitext(basename)
153        finished_name = "%s.%s.finished%s" % (base, mode, ext)
154        if not pending:
155            pending_name = "%s.%s.pending%s" % (base, mode, ext)
156
157        # Put .pending and .finished file into respective places...
158        pending_dest = os.path.join(self.storage, pending_name)
159        finished_dest = os.path.join(finished_dir, finished_name)
160        self._moveFile(finished_file, finished_dest)
161        if pending_file is not None:
162            self._moveFile(pending_file, pending_dest)
163
164        # Put source file into final location...
165        finished_dest = os.path.join(finished_dir, basename)
166        unfinished_dest = os.path.join(unfinished_dir, basename)
167        if successful and not pending:
168            self._moveFile(source_path, finished_dest)
169        elif successful and pending:
170            self._moveFile(unfinished_dest, finished_dest)
171            os.unlink(source_path)
172        elif not successful and not pending:
173            self._moveFile(source_path, unfinished_dest)
174
175        # If finished and pending-file were created in a location
176        # outside datacenter storage, we remove it.
177        maybe_temp_dir = os.path.dirname(finished_file)
178        if os.path.commonprefix(
179            [self.storage, maybe_temp_dir]) != self.storage:
180            shutil.rmtree(maybe_temp_dir)
181        return
182
183
184class DataCenterFile(object):
185    """A description of a file stored in data center.
186    """
187    grok.implements(IDataCenterFile)
188
189    def __init__(self, context):
190        self.context = context
191        self.name = os.path.basename(self.context)
192        self.size = self.getSize()
193        self.uploaddate = self.getDate()
194        self.lines = self.getLinesNumber()
195
196    def getDate(self):
197        """Get a human readable datetime representation.
198        """
199        date = datetime.fromtimestamp(os.path.getctime(self.context))
200        return date.strftime("%Y-%m-%d %H:%M:%S")
201
202    def getTimeStamp(self):
203        """Get a (machine readable) timestamp.
204        """
205        return os.path.getctime(self.context)
206
207    def getSize(self):
208        """Get a human readable file size.
209        """
210        bytesize = os.path.getsize(self.context)
211        size = "%s bytes" % bytesize
212        units = ['kb', 'MB', 'GB']
213        for power, unit in reversed(list(enumerate(units))):
214            power += 1
215            if bytesize >= 1024 ** power:
216                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
217                break
218        return size
219
220    def getLinesNumber(self):
221        """Get number of lines.
222        """
223        num = 0
224        for line in open(self.context, 'rb'):
225            num += 1
226        return num
227
228class LogFile(DataCenterFile):
229    """A description of a log file.
230    """
231    def __init__(self, context):
232        super(LogFile, self).__init__(context)
233        self._markers = dict()
234        self._parsed = False
235        self.userid = self.getUserId()
236        self.mode = self.getMode()
237        self.stats = self.getStats()
238        self.source = self.getSourcePath()
239
240    def _parseFile(self, maxline=10):
241        """Find markers in a file.
242        """
243        if self._parsed:
244            return
245        for line in open(self.context, 'rb'):
246            line = line.strip()
247            if not ':' in line:
248                continue
249            name, text = line.split(':', 1)
250            self._markers[name.lower()] = text
251        self._parsed = True
252        return
253
254    def _getMarker(self, marker):
255        marker = marker.lower()
256        if not self._parsed:
257            self._parseFile()
258        if marker in self._markers.keys():
259            return self._markers[marker]
260
261    def getUserId(self):
262        return self._getMarker('user') or '<UNKNOWN>'
263
264    def getMode(self):
265        return self._getMarker('mode') or '<NOT SET>'
266
267    def getStats(self):
268        return self._getMarker('processed') or '<Info not avail.>'
269
270    def getSourcePath(self):
271        return self._getMarker('source') or None
272
273
274class DataCenterStorageMovedEvent(ObjectEvent):
275    """An event fired, when datacenter storage moves.
276    """
277    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.