source: main/waeup.kofa/trunk/src/waeup/kofa/datacenter.py @ 8426

Last change on this file since 8426 was 8397, checked in by uli, 12 years ago

Pyflakes.

  • Property svn:keywords set to Id
File size: 9.7 KB
Line 
1## $Id: datacenter.py 8397 2012-05-09 13:40:26Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Kofa data center.
19
20The waeup data center cares for management of upload data and provides
21tools for importing/exporting CSV data.
22"""
23import os
24import shutil
25import grok
26from datetime import datetime
27from zope.component import getUtility
28from zope.component.interfaces import ObjectEvent
29from waeup.kofa.interfaces import (IDataCenter, IDataCenterFile,
30                                   IDataCenterStorageMovedEvent,
31                                   IDataCenterConfig)
32from waeup.kofa.utils.helpers import copy_filesystem_tree
33from waeup.kofa.utils.logger import Logger
34
35class DataCenter(grok.Container, Logger):
36    """A data center contains CSV files.
37    """
38    grok.implements(IDataCenter)
39
40    logger_name = 'waeup.kofa.${sitename}.datacenter'
41    logger_filename = 'datacenter.log'
42
43    def __init__(self, *args, **kw):
44        super(DataCenter, self).__init__(*args, **kw)
45        self.storage = getUtility(IDataCenterConfig)['path']
46        self._createSubDirs()
47
48    def _createSubDirs(self):
49        """Create standard subdirs.
50        """
51        for name in ['finished', 'unfinished', 'logs', 'deleted']:
52            path = os.path.join(self.storage, name)
53            if os.path.exists(path):
54                continue
55            os.mkdir(path)
56        return
57
58    @property
59    def deleted_path(self):
60        """Get the path for deleted object data.
61        """
62        return os.path.join(self.storage, 'deleted')
63
64    def getFiles(self, sort='name'):
65        """Get a list of files stored in `storage`.
66
67        Files are sorted by basename.
68        """
69        result = []
70        if not os.path.exists(self.storage):
71            return result
72        for filename in sorted(os.listdir(self.storage)):
73            fullpath = os.path.join(self.storage, filename)
74            if not os.path.isfile(fullpath):
75                continue
76            result.append(DataCenterFile(fullpath))
77        if sort == 'date':
78            # sort results in newest-first order...
79            result = sorted(result, key=lambda x: x.getTimeStamp(),
80                            reverse=True)
81        return result
82
83    def getLogFiles(self):
84        """Get the files from logs/ subdir. Files are sorted by name.
85        """
86        result = []
87        logdir = os.path.join(self.storage, 'logs')
88        if not os.path.exists(logdir):
89            os.mkdir(logdir)
90        for name in sorted(os.listdir(logdir)):
91            if not os.path.isfile(os.path.join(logdir, name)):
92                continue
93            result.append(
94                LogFile(os.path.join(self.storage, 'logs', name)))
95        return result
96
97    def setStoragePath(self, path, move=False, overwrite=False):
98        """Set the path where to store files.
99        """
100        path = os.path.abspath(path)
101        not_copied = []
102        if not os.path.exists(path):
103            raise ValueError('The path given does not exist: %s' % path)
104        if move is True:
105            not_copied = copy_filesystem_tree(self.storage, path,
106                                            overwrite=overwrite)
107        self.storage = path
108        self._createSubDirs()
109        grok.notify(DataCenterStorageMovedEvent(self))
110        return not_copied
111
112    def _moveFile(self, source, dest):
113        """Move file source to dest preserving ctime, mtime, etc.
114        """
115        if not os.path.exists(source):
116            self.logger.warn('No such source path: %s' % source)
117            return
118        if source == dest:
119            return
120        shutil.copyfile(source, dest)
121        shutil.copystat(source, dest)
122        os.unlink(source)
123
124    def distProcessedFiles(self, successful, source_path, finished_file,
125                           pending_file, mode='create', move_orig=True):
126        """Put processed files into final locations.
127
128        ``successful`` is a boolean that tells, whether processing was
129        successful.
130
131        ``source_path``: path to file that was processed.
132
133        ``finished_file``, ``pending_file``: paths to the respective
134        generated .pending and .finished file. The .pending file path
135        may be ``None``.
136
137        If finished file is placed in a location outside the local
138        storage dir, the complete directory is removed
139        afterwards. Regular processors should put their stuff in
140        dedicated temporary dirs.
141
142        See datacenter.txt for more info about how this works.
143        """
144        basename = os.path.basename(source_path)
145        pending_name = basename
146        pending = False
147        finished_dir = os.path.join(self.storage, 'finished')
148        unfinished_dir = os.path.join(self.storage, 'unfinished')
149
150        if basename.endswith('.pending.csv'):
151            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
152            maybe_src = os.path.join(unfinished_dir, maybe_basename)
153            if os.path.isfile(maybe_src):
154                basename = maybe_basename
155                pending = True
156
157        base, ext = os.path.splitext(basename)
158        finished_name = "%s.%s.finished%s" % (base, mode, ext)
159        if not pending:
160            pending_name = "%s.%s.pending%s" % (base, mode, ext)
161
162        # Put .pending and .finished file into respective places...
163        pending_dest = os.path.join(self.storage, pending_name)
164        finished_dest = os.path.join(finished_dir, finished_name)
165        self._moveFile(finished_file, finished_dest)
166        if pending_file is not None:
167            self._moveFile(pending_file, pending_dest)
168
169        # Put source file into final location...
170        finished_dest = os.path.join(finished_dir, basename)
171        unfinished_dest = os.path.join(unfinished_dir, basename)
172        if successful and not pending:
173            self._moveFile(source_path, finished_dest)
174        elif successful and pending:
175            self._moveFile(unfinished_dest, finished_dest)
176            os.unlink(source_path)
177        elif not successful and not pending:
178            self._moveFile(source_path, unfinished_dest)
179
180        # If finished and pending-file were created in a location
181        # outside datacenter storage, we remove it.
182        maybe_temp_dir = os.path.dirname(finished_file)
183        if os.path.commonprefix(
184            [self.storage, maybe_temp_dir]) != self.storage:
185            shutil.rmtree(maybe_temp_dir)
186        return
187
188
189class DataCenterFile(object):
190    """A description of a file stored in data center.
191    """
192    grok.implements(IDataCenterFile)
193
194    def __init__(self, context):
195        self.context = context
196        self.name = os.path.basename(self.context)
197        self.size = self.getSize()
198        self.uploaddate = self.getDate()
199        self.lines = self.getLinesNumber()
200
201    def getDate(self):
202        """Get a human readable datetime representation.
203        """
204        date = datetime.fromtimestamp(os.path.getctime(self.context))
205        return date.strftime("%Y-%m-%d %H:%M:%S")
206
207    def getTimeStamp(self):
208        """Get a (machine readable) timestamp.
209        """
210        return os.path.getctime(self.context)
211
212    def getSize(self):
213        """Get a human readable file size.
214        """
215        bytesize = os.path.getsize(self.context)
216        size = "%s bytes" % bytesize
217        units = ['kb', 'MB', 'GB']
218        for power, unit in reversed(list(enumerate(units))):
219            power += 1
220            if bytesize >= 1024 ** power:
221                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
222                break
223        return size
224
225    def getLinesNumber(self):
226        """Get number of lines.
227        """
228        num = 0
229        for line in open(self.context, 'rb'):
230            num += 1
231        return num
232
233class LogFile(DataCenterFile):
234    """A description of a log file.
235    """
236    def __init__(self, context):
237        super(LogFile, self).__init__(context)
238        self._markers = dict()
239        self._parsed = False
240        self.userid = self.getUserId()
241        self.mode = self.getMode()
242        self.stats = self.getStats()
243        self.source = self.getSourcePath()
244
245    def _parseFile(self, maxline=10):
246        """Find markers in a file.
247        """
248        if self._parsed:
249            return
250        for line in open(self.context, 'rb'):
251            line = line.strip()
252            if not ':' in line:
253                continue
254            name, text = line.split(':', 1)
255            self._markers[name.lower()] = text
256        self._parsed = True
257        return
258
259    def _getMarker(self, marker):
260        marker = marker.lower()
261        if not self._parsed:
262            self._parseFile()
263        if marker in self._markers.keys():
264            return self._markers[marker]
265
266    def getUserId(self):
267        return self._getMarker('user') or '<UNKNOWN>'
268
269    def getMode(self):
270        return self._getMarker('mode') or '<NOT SET>'
271
272    def getStats(self):
273        return self._getMarker('processed') or '<Info not avail.>'
274
275    def getSourcePath(self):
276        return self._getMarker('source') or None
277
278
279class DataCenterStorageMovedEvent(ObjectEvent):
280    """An event fired, when datacenter storage moves.
281    """
282    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.