source: main/waeup.sirp/trunk/src/waeup/sirp/datacenter.py @ 7204

Last change on this file since 7204 was 7193, checked in by Henrik Bettermann, 13 years ago

More copyright adjustments.

  • Property svn:keywords set to Id
File size: 9.4 KB
RevLine 
[7193]1## $Id: datacenter.py 7193 2011-11-25 07:21:29Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[4146]18"""WAeUP data center.
19
20The waeup data center cares for management of upload data and provides
21tools for importing/exporting CSV data.
22"""
[4873]23import logging
[4146]24import os
[4896]25import shutil
[4146]26import grok
27from datetime import datetime
[4883]28from zope.component.interfaces import ObjectEvent
[4961]29from waeup.sirp.interfaces import (IDataCenter, IDataCenterFile,
30                                   IDataCenterStorageMovedEvent)
[7186]31from waeup.sirp.utils.helpers import copy_filesystem_tree
[6578]32from waeup.sirp.utils.logger import Logger
[4146]33
[6578]34class DataCenter(grok.Container, Logger):
[4146]35    """A data center contains CSV files.
36    """
[4669]37    grok.implements(IDataCenter)
[4146]38    storage = os.path.join(os.path.dirname(__file__), 'files')
[4166]39
[6578]40    logger_name = 'waeup.sirp.${sitename}.datacenter'
41    logger_filename = 'datacenter.log'
[4873]42
[4892]43    def __init__(self, *args, **kw):
44        super(DataCenter, self).__init__(*args, **kw)
45        self._createSubDirs()
[6286]46
[4892]47    def _createSubDirs(self):
48        """Create standard subdirs.
49        """
[6578]50        for name in ['finished', 'unfinished', 'logs']:
[4892]51            path = os.path.join(self.storage, name)
52            if os.path.exists(path):
53                continue
54            os.mkdir(path)
55        return
[6286]56
[4858]57    def getFiles(self, sort='name'):
[4146]58        """Get a list of files stored in `storage`.
[4574]59
60        Files are sorted by basename.
[4146]61        """
62        result = []
63        if not os.path.exists(self.storage):
64            return result
[4574]65        for filename in sorted(os.listdir(self.storage)):
[4146]66            fullpath = os.path.join(self.storage, filename)
67            if not os.path.isfile(fullpath):
68                continue
69            result.append(DataCenterFile(fullpath))
[4858]70        if sort == 'date':
71            # sort results in newest-first order...
72            result = sorted(result, key=lambda x: x.getTimeStamp(),
73                            reverse=True)
[4146]74        return result
75
[4858]76    def getLogFiles(self):
[4908]77        """Get the files from logs/ subdir. Files are sorted by name.
[4858]78        """
79        result = []
[4908]80        logdir = os.path.join(self.storage, 'logs')
81        if not os.path.exists(logdir):
82            os.mkdir(logdir)
83        for name in sorted(os.listdir(logdir)):
84            if not os.path.isfile(os.path.join(logdir, name)):
[4858]85                continue
86            result.append(
[4908]87                LogFile(os.path.join(self.storage, 'logs', name)))
[4858]88        return result
[6286]89
[4190]90    def setStoragePath(self, path, move=False, overwrite=False):
[4146]91        """Set the path where to store files.
92        """
93        path = os.path.abspath(path)
[4190]94        not_copied = []
[4152]95        if not os.path.exists(path):
96            raise ValueError('The path given does not exist: %s' % path)
[4173]97        if move is True:
[4190]98
[7186]99            not_copied = copy_filesystem_tree(self.storage, path,
[4190]100                                            overwrite=overwrite)
[4146]101        self.storage = path
[4892]102        self._createSubDirs()
[4883]103        grok.notify(DataCenterStorageMovedEvent(self))
[4190]104        return not_copied
[4146]105
[4896]106    def _moveFile(self, source, dest):
107        """Move file source to dest preserving ctime, mtime, etc.
108        """
109        if not os.path.exists(source):
110            self.logger.warn('No such source path: %s' % source)
111            return
112        if source == dest:
113            return
114        shutil.copyfile(source, dest)
115        shutil.copystat(source, dest)
116        os.unlink(source)
[6286]117
[4896]118    def distProcessedFiles(self, successful, source_path, finished_file,
[4996]119                           pending_file, mode='create', move_orig=True):
[4896]120        """Put processed files into final locations.
[4858]121
[4896]122        ``successful`` is a boolean that tells, whether processing was
123        successful.
124
125        ``source_path``: path to file that was processed.
126
127        ``finished_file``, ``pending_file``: paths to the respective
128        generated .pending and .finished file. The .pending file path
129        may be ``None``.
130
[4904]131        If finished file is placed in a location outside the local
132        storage dir, the complete directory is removed
133        afterwards. Regular importers should put their stuff in
134        dedicated temporary dirs.
[6286]135
[4896]136        See datacenter.txt for more info about how this works.
137        """
138        basename = os.path.basename(source_path)
139        pending_name = basename
140        pending = False
141        finished_dir = os.path.join(self.storage, 'finished')
142        unfinished_dir = os.path.join(self.storage, 'unfinished')
143
144        if basename.endswith('.pending.csv'):
[4996]145            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
[4896]146            maybe_src = os.path.join(unfinished_dir, maybe_basename)
147            if os.path.isfile(maybe_src):
148                basename = maybe_basename
149                pending = True
[6286]150
[4896]151        base, ext = os.path.splitext(basename)
[4996]152        finished_name = "%s.%s.finished%s" % (base, mode, ext)
[4896]153        if not pending:
[4996]154            pending_name = "%s.%s.pending%s" % (base, mode, ext)
[4896]155
156        # Put .pending and .finished file into respective places...
157        pending_dest = os.path.join(self.storage, pending_name)
158        finished_dest = os.path.join(finished_dir, finished_name)
159        self._moveFile(finished_file, finished_dest)
160        if pending_file is not None:
161            self._moveFile(pending_file, pending_dest)
162
163        # Put source file into final location...
164        finished_dest = os.path.join(finished_dir, basename)
165        unfinished_dest = os.path.join(unfinished_dir, basename)
166        if successful and not pending:
167            self._moveFile(source_path, finished_dest)
168        elif successful and pending:
169            self._moveFile(unfinished_dest, finished_dest)
170            os.unlink(source_path)
171        elif not successful and not pending:
172            self._moveFile(source_path, unfinished_dest)
[4904]173
174        # If finished and pending-file were created in a location
175        # outside datacenter storage, we remove it.
[4906]176        maybe_temp_dir = os.path.dirname(finished_file)
[4904]177        if os.path.commonprefix(
[4906]178            [self.storage, maybe_temp_dir]) != self.storage:
179            shutil.rmtree(maybe_temp_dir)
[4896]180        return
181
[6286]182
[4146]183class DataCenterFile(object):
184    """A description of a file stored in data center.
185    """
[4166]186    grok.implements(IDataCenterFile)
[6286]187
[4146]188    def __init__(self, context):
189        self.context = context
190        self.name = os.path.basename(self.context)
191        self.size = self.getSize()
192        self.uploaddate = self.getDate()
[4858]193        self.lines = self.getLinesNumber()
[4146]194
195    def getDate(self):
196        """Get a human readable datetime representation.
197        """
198        date = datetime.fromtimestamp(os.path.getctime(self.context))
[6827]199        return date.strftime("%Y-%m-%d %H:%M:%S")
[4146]200
[4858]201    def getTimeStamp(self):
202        """Get a (machine readable) timestamp.
203        """
204        return os.path.getctime(self.context)
[6286]205
[4146]206    def getSize(self):
207        """Get a human readable file size.
208        """
209        bytesize = os.path.getsize(self.context)
210        size = "%s bytes" % bytesize
211        units = ['kb', 'MB', 'GB']
212        for power, unit in reversed(list(enumerate(units))):
213            power += 1
214            if bytesize >= 1024 ** power:
215                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
216                break
217        return size
218
[4858]219    def getLinesNumber(self):
220        """Get number of lines.
221        """
222        num = 0
223        for line in open(self.context, 'rb'):
224            num += 1
225        return num
[6286]226
[4858]227class LogFile(DataCenterFile):
228    """A description of a log file.
229    """
230    def __init__(self, context):
231        super(LogFile, self).__init__(context)
232        self._markers = dict()
233        self._parsed = False
234        self.userid = self.getUserId()
235        self.mode = self.getMode()
236        self.stats = self.getStats()
237        self.source = self.getSourcePath()
238
239    def _parseFile(self, maxline=10):
240        """Find markers in a file.
241        """
242        if self._parsed:
243            return
244        for line in open(self.context, 'rb'):
245            line = line.strip()
246            if not ':' in line:
247                continue
248            name, text = line.split(':', 1)
249            self._markers[name.lower()] = text
250        self._parsed = True
251        return
252
253    def _getMarker(self, marker):
254        marker = marker.lower()
255        if not self._parsed:
256            self._parseFile()
257        if marker in self._markers.keys():
258            return self._markers[marker]
[6286]259
[4858]260    def getUserId(self):
261        return self._getMarker('user') or '<UNKNOWN>'
262
263    def getMode(self):
264        return self._getMarker('mode') or '<NOT SET>'
265
266    def getStats(self):
267        return self._getMarker('processed') or '<Info not avail.>'
268
269    def getSourcePath(self):
270        return self._getMarker('source') or None
[4883]271
[4961]272
[4883]273class DataCenterStorageMovedEvent(ObjectEvent):
274    """An event fired, when datacenter storage moves.
275    """
276    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.