source: main/waeup.kofa/trunk/src/waeup/kofa/datacenter.py @ 8403

Last change on this file since 8403 was 8397, checked in by uli, 13 years ago

Pyflakes.

  • Property svn:keywords set to Id
File size: 9.7 KB
RevLine 
[7193]1## $Id: datacenter.py 8397 2012-05-09 13:40:26Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[7819]18"""Kofa data center.
[4146]19
20The waeup data center cares for management of upload data and provides
21tools for importing/exporting CSV data.
22"""
23import os
[4896]24import shutil
[4146]25import grok
26from datetime import datetime
[7568]27from zope.component import getUtility
[4883]28from zope.component.interfaces import ObjectEvent
[7811]29from waeup.kofa.interfaces import (IDataCenter, IDataCenterFile,
[7568]30                                   IDataCenterStorageMovedEvent,
[7579]31                                   IDataCenterConfig)
[7811]32from waeup.kofa.utils.helpers import copy_filesystem_tree
33from waeup.kofa.utils.logger import Logger
[4146]34
[6578]35class DataCenter(grok.Container, Logger):
[4146]36    """A data center contains CSV files.
37    """
[4669]38    grok.implements(IDataCenter)
[4166]39
[7811]40    logger_name = 'waeup.kofa.${sitename}.datacenter'
[6578]41    logger_filename = 'datacenter.log'
[4873]42
[4892]43    def __init__(self, *args, **kw):
44        super(DataCenter, self).__init__(*args, **kw)
[7579]45        self.storage = getUtility(IDataCenterConfig)['path']
[4892]46        self._createSubDirs()
[6286]47
[4892]48    def _createSubDirs(self):
49        """Create standard subdirs.
50        """
[8372]51        for name in ['finished', 'unfinished', 'logs', 'deleted']:
[4892]52            path = os.path.join(self.storage, name)
53            if os.path.exists(path):
54                continue
55            os.mkdir(path)
56        return
[6286]57
[8395]58    @property
59    def deleted_path(self):
60        """Get the path for deleted object data.
61        """
62        return os.path.join(self.storage, 'deleted')
63
[4858]64    def getFiles(self, sort='name'):
[4146]65        """Get a list of files stored in `storage`.
[4574]66
67        Files are sorted by basename.
[4146]68        """
69        result = []
70        if not os.path.exists(self.storage):
71            return result
[4574]72        for filename in sorted(os.listdir(self.storage)):
[4146]73            fullpath = os.path.join(self.storage, filename)
74            if not os.path.isfile(fullpath):
75                continue
76            result.append(DataCenterFile(fullpath))
[4858]77        if sort == 'date':
78            # sort results in newest-first order...
79            result = sorted(result, key=lambda x: x.getTimeStamp(),
80                            reverse=True)
[4146]81        return result
82
[4858]83    def getLogFiles(self):
[4908]84        """Get the files from logs/ subdir. Files are sorted by name.
[4858]85        """
86        result = []
[4908]87        logdir = os.path.join(self.storage, 'logs')
88        if not os.path.exists(logdir):
89            os.mkdir(logdir)
90        for name in sorted(os.listdir(logdir)):
91            if not os.path.isfile(os.path.join(logdir, name)):
[4858]92                continue
93            result.append(
[4908]94                LogFile(os.path.join(self.storage, 'logs', name)))
[4858]95        return result
[6286]96
[4190]97    def setStoragePath(self, path, move=False, overwrite=False):
[4146]98        """Set the path where to store files.
99        """
100        path = os.path.abspath(path)
[4190]101        not_copied = []
[4152]102        if not os.path.exists(path):
103            raise ValueError('The path given does not exist: %s' % path)
[4173]104        if move is True:
[7186]105            not_copied = copy_filesystem_tree(self.storage, path,
[4190]106                                            overwrite=overwrite)
[4146]107        self.storage = path
[4892]108        self._createSubDirs()
[4883]109        grok.notify(DataCenterStorageMovedEvent(self))
[4190]110        return not_copied
[4146]111
[4896]112    def _moveFile(self, source, dest):
113        """Move file source to dest preserving ctime, mtime, etc.
114        """
115        if not os.path.exists(source):
116            self.logger.warn('No such source path: %s' % source)
117            return
118        if source == dest:
119            return
120        shutil.copyfile(source, dest)
121        shutil.copystat(source, dest)
122        os.unlink(source)
[6286]123
[4896]124    def distProcessedFiles(self, successful, source_path, finished_file,
[4996]125                           pending_file, mode='create', move_orig=True):
[4896]126        """Put processed files into final locations.
[4858]127
[4896]128        ``successful`` is a boolean that tells, whether processing was
129        successful.
130
131        ``source_path``: path to file that was processed.
132
133        ``finished_file``, ``pending_file``: paths to the respective
134        generated .pending and .finished file. The .pending file path
135        may be ``None``.
136
[4904]137        If finished file is placed in a location outside the local
138        storage dir, the complete directory is removed
[7933]139        afterwards. Regular processors should put their stuff in
[4904]140        dedicated temporary dirs.
[6286]141
[4896]142        See datacenter.txt for more info about how this works.
143        """
144        basename = os.path.basename(source_path)
145        pending_name = basename
146        pending = False
147        finished_dir = os.path.join(self.storage, 'finished')
148        unfinished_dir = os.path.join(self.storage, 'unfinished')
149
150        if basename.endswith('.pending.csv'):
[4996]151            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
[4896]152            maybe_src = os.path.join(unfinished_dir, maybe_basename)
153            if os.path.isfile(maybe_src):
154                basename = maybe_basename
155                pending = True
[6286]156
[4896]157        base, ext = os.path.splitext(basename)
[4996]158        finished_name = "%s.%s.finished%s" % (base, mode, ext)
[4896]159        if not pending:
[4996]160            pending_name = "%s.%s.pending%s" % (base, mode, ext)
[4896]161
162        # Put .pending and .finished file into respective places...
163        pending_dest = os.path.join(self.storage, pending_name)
164        finished_dest = os.path.join(finished_dir, finished_name)
165        self._moveFile(finished_file, finished_dest)
166        if pending_file is not None:
167            self._moveFile(pending_file, pending_dest)
168
169        # Put source file into final location...
170        finished_dest = os.path.join(finished_dir, basename)
171        unfinished_dest = os.path.join(unfinished_dir, basename)
172        if successful and not pending:
173            self._moveFile(source_path, finished_dest)
174        elif successful and pending:
175            self._moveFile(unfinished_dest, finished_dest)
176            os.unlink(source_path)
177        elif not successful and not pending:
178            self._moveFile(source_path, unfinished_dest)
[4904]179
180        # If finished and pending-file were created in a location
181        # outside datacenter storage, we remove it.
[4906]182        maybe_temp_dir = os.path.dirname(finished_file)
[4904]183        if os.path.commonprefix(
[4906]184            [self.storage, maybe_temp_dir]) != self.storage:
185            shutil.rmtree(maybe_temp_dir)
[4896]186        return
187
[6286]188
[4146]189class DataCenterFile(object):
190    """A description of a file stored in data center.
191    """
[4166]192    grok.implements(IDataCenterFile)
[6286]193
[4146]194    def __init__(self, context):
195        self.context = context
196        self.name = os.path.basename(self.context)
197        self.size = self.getSize()
198        self.uploaddate = self.getDate()
[4858]199        self.lines = self.getLinesNumber()
[4146]200
201    def getDate(self):
202        """Get a human readable datetime representation.
203        """
204        date = datetime.fromtimestamp(os.path.getctime(self.context))
[6827]205        return date.strftime("%Y-%m-%d %H:%M:%S")
[4146]206
[4858]207    def getTimeStamp(self):
208        """Get a (machine readable) timestamp.
209        """
210        return os.path.getctime(self.context)
[6286]211
[4146]212    def getSize(self):
213        """Get a human readable file size.
214        """
215        bytesize = os.path.getsize(self.context)
216        size = "%s bytes" % bytesize
217        units = ['kb', 'MB', 'GB']
218        for power, unit in reversed(list(enumerate(units))):
219            power += 1
220            if bytesize >= 1024 ** power:
221                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
222                break
223        return size
224
[4858]225    def getLinesNumber(self):
226        """Get number of lines.
227        """
228        num = 0
229        for line in open(self.context, 'rb'):
230            num += 1
231        return num
[6286]232
[4858]233class LogFile(DataCenterFile):
234    """A description of a log file.
235    """
236    def __init__(self, context):
237        super(LogFile, self).__init__(context)
238        self._markers = dict()
239        self._parsed = False
240        self.userid = self.getUserId()
241        self.mode = self.getMode()
242        self.stats = self.getStats()
243        self.source = self.getSourcePath()
244
245    def _parseFile(self, maxline=10):
246        """Find markers in a file.
247        """
248        if self._parsed:
249            return
250        for line in open(self.context, 'rb'):
251            line = line.strip()
252            if not ':' in line:
253                continue
254            name, text = line.split(':', 1)
255            self._markers[name.lower()] = text
256        self._parsed = True
257        return
258
259    def _getMarker(self, marker):
260        marker = marker.lower()
261        if not self._parsed:
262            self._parseFile()
263        if marker in self._markers.keys():
264            return self._markers[marker]
[6286]265
[4858]266    def getUserId(self):
267        return self._getMarker('user') or '<UNKNOWN>'
268
269    def getMode(self):
270        return self._getMarker('mode') or '<NOT SET>'
271
272    def getStats(self):
273        return self._getMarker('processed') or '<Info not avail.>'
274
275    def getSourcePath(self):
276        return self._getMarker('source') or None
[4883]277
[4961]278
[4883]279class DataCenterStorageMovedEvent(ObjectEvent):
280    """An event fired, when datacenter storage moves.
281    """
282    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.