source: main/waeup.sirp/trunk/src/waeup/sirp/datacenter.py @ 7580

Last change on this file since 7580 was 7579, checked in by Henrik Bettermann, 13 years ago

Let's use the new IDataCenterConfig utility instead of ISIRPUtils to retrieve the data center storage path.

  • Property svn:keywords set to Id
File size: 9.6 KB
Line 
1## $Id: datacenter.py 7579 2012-02-03 09:13:46Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""SIRP data center.
19
20The waeup data center cares for management of upload data and provides
21tools for importing/exporting CSV data.
22"""
23import logging
24import os
25import shutil
26import grok
27from datetime import datetime
28from zope.component import getUtility
29from zope.component.interfaces import ObjectEvent
30from waeup.sirp.interfaces import (IDataCenter, IDataCenterFile,
31                                   IDataCenterStorageMovedEvent,
32                                   #ISIRPUtils,
33                                   IDataCenterConfig)
34from waeup.sirp.utils.helpers import copy_filesystem_tree
35from waeup.sirp.utils.logger import Logger
36
37class DataCenter(grok.Container, Logger):
38    """A data center contains CSV files.
39    """
40    grok.implements(IDataCenter)
41
42    logger_name = 'waeup.sirp.${sitename}.datacenter'
43    logger_filename = 'datacenter.log'
44
45    def __init__(self, *args, **kw):
46        super(DataCenter, self).__init__(*args, **kw)
47        #self.storage = getUtility(ISIRPUtils).storage()
48        self.storage = getUtility(IDataCenterConfig)['path']
49        self._createSubDirs()
50
51    def _createSubDirs(self):
52        """Create standard subdirs.
53        """
54        for name in ['finished', 'unfinished', 'logs']:
55            path = os.path.join(self.storage, name)
56            if os.path.exists(path):
57                continue
58            os.mkdir(path)
59        return
60
61    def getFiles(self, sort='name'):
62        """Get a list of files stored in `storage`.
63
64        Files are sorted by basename.
65        """
66        result = []
67        if not os.path.exists(self.storage):
68            return result
69        for filename in sorted(os.listdir(self.storage)):
70            fullpath = os.path.join(self.storage, filename)
71            if not os.path.isfile(fullpath):
72                continue
73            result.append(DataCenterFile(fullpath))
74        if sort == 'date':
75            # sort results in newest-first order...
76            result = sorted(result, key=lambda x: x.getTimeStamp(),
77                            reverse=True)
78        return result
79
80    def getLogFiles(self):
81        """Get the files from logs/ subdir. Files are sorted by name.
82        """
83        result = []
84        logdir = os.path.join(self.storage, 'logs')
85        if not os.path.exists(logdir):
86            os.mkdir(logdir)
87        for name in sorted(os.listdir(logdir)):
88            if not os.path.isfile(os.path.join(logdir, name)):
89                continue
90            result.append(
91                LogFile(os.path.join(self.storage, 'logs', name)))
92        return result
93
94    def setStoragePath(self, path, move=False, overwrite=False):
95        """Set the path where to store files.
96        """
97        path = os.path.abspath(path)
98        not_copied = []
99        if not os.path.exists(path):
100            raise ValueError('The path given does not exist: %s' % path)
101        if move is True:
102
103            not_copied = copy_filesystem_tree(self.storage, path,
104                                            overwrite=overwrite)
105        self.storage = path
106        self._createSubDirs()
107        grok.notify(DataCenterStorageMovedEvent(self))
108        return not_copied
109
110    def _moveFile(self, source, dest):
111        """Move file source to dest preserving ctime, mtime, etc.
112        """
113        if not os.path.exists(source):
114            self.logger.warn('No such source path: %s' % source)
115            return
116        if source == dest:
117            return
118        shutil.copyfile(source, dest)
119        shutil.copystat(source, dest)
120        os.unlink(source)
121
122    def distProcessedFiles(self, successful, source_path, finished_file,
123                           pending_file, mode='create', move_orig=True):
124        """Put processed files into final locations.
125
126        ``successful`` is a boolean that tells, whether processing was
127        successful.
128
129        ``source_path``: path to file that was processed.
130
131        ``finished_file``, ``pending_file``: paths to the respective
132        generated .pending and .finished file. The .pending file path
133        may be ``None``.
134
135        If finished file is placed in a location outside the local
136        storage dir, the complete directory is removed
137        afterwards. Regular importers should put their stuff in
138        dedicated temporary dirs.
139
140        See datacenter.txt for more info about how this works.
141        """
142        basename = os.path.basename(source_path)
143        pending_name = basename
144        pending = False
145        finished_dir = os.path.join(self.storage, 'finished')
146        unfinished_dir = os.path.join(self.storage, 'unfinished')
147
148        if basename.endswith('.pending.csv'):
149            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
150            maybe_src = os.path.join(unfinished_dir, maybe_basename)
151            if os.path.isfile(maybe_src):
152                basename = maybe_basename
153                pending = True
154
155        base, ext = os.path.splitext(basename)
156        finished_name = "%s.%s.finished%s" % (base, mode, ext)
157        if not pending:
158            pending_name = "%s.%s.pending%s" % (base, mode, ext)
159
160        # Put .pending and .finished file into respective places...
161        pending_dest = os.path.join(self.storage, pending_name)
162        finished_dest = os.path.join(finished_dir, finished_name)
163        self._moveFile(finished_file, finished_dest)
164        if pending_file is not None:
165            self._moveFile(pending_file, pending_dest)
166
167        # Put source file into final location...
168        finished_dest = os.path.join(finished_dir, basename)
169        unfinished_dest = os.path.join(unfinished_dir, basename)
170        if successful and not pending:
171            self._moveFile(source_path, finished_dest)
172        elif successful and pending:
173            self._moveFile(unfinished_dest, finished_dest)
174            os.unlink(source_path)
175        elif not successful and not pending:
176            self._moveFile(source_path, unfinished_dest)
177
178        # If finished and pending-file were created in a location
179        # outside datacenter storage, we remove it.
180        maybe_temp_dir = os.path.dirname(finished_file)
181        if os.path.commonprefix(
182            [self.storage, maybe_temp_dir]) != self.storage:
183            shutil.rmtree(maybe_temp_dir)
184        return
185
186
187class DataCenterFile(object):
188    """A description of a file stored in data center.
189    """
190    grok.implements(IDataCenterFile)
191
192    def __init__(self, context):
193        self.context = context
194        self.name = os.path.basename(self.context)
195        self.size = self.getSize()
196        self.uploaddate = self.getDate()
197        self.lines = self.getLinesNumber()
198
199    def getDate(self):
200        """Get a human readable datetime representation.
201        """
202        date = datetime.fromtimestamp(os.path.getctime(self.context))
203        return date.strftime("%Y-%m-%d %H:%M:%S")
204
205    def getTimeStamp(self):
206        """Get a (machine readable) timestamp.
207        """
208        return os.path.getctime(self.context)
209
210    def getSize(self):
211        """Get a human readable file size.
212        """
213        bytesize = os.path.getsize(self.context)
214        size = "%s bytes" % bytesize
215        units = ['kb', 'MB', 'GB']
216        for power, unit in reversed(list(enumerate(units))):
217            power += 1
218            if bytesize >= 1024 ** power:
219                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
220                break
221        return size
222
223    def getLinesNumber(self):
224        """Get number of lines.
225        """
226        num = 0
227        for line in open(self.context, 'rb'):
228            num += 1
229        return num
230
231class LogFile(DataCenterFile):
232    """A description of a log file.
233    """
234    def __init__(self, context):
235        super(LogFile, self).__init__(context)
236        self._markers = dict()
237        self._parsed = False
238        self.userid = self.getUserId()
239        self.mode = self.getMode()
240        self.stats = self.getStats()
241        self.source = self.getSourcePath()
242
243    def _parseFile(self, maxline=10):
244        """Find markers in a file.
245        """
246        if self._parsed:
247            return
248        for line in open(self.context, 'rb'):
249            line = line.strip()
250            if not ':' in line:
251                continue
252            name, text = line.split(':', 1)
253            self._markers[name.lower()] = text
254        self._parsed = True
255        return
256
257    def _getMarker(self, marker):
258        marker = marker.lower()
259        if not self._parsed:
260            self._parseFile()
261        if marker in self._markers.keys():
262            return self._markers[marker]
263
264    def getUserId(self):
265        return self._getMarker('user') or '<UNKNOWN>'
266
267    def getMode(self):
268        return self._getMarker('mode') or '<NOT SET>'
269
270    def getStats(self):
271        return self._getMarker('processed') or '<Info not avail.>'
272
273    def getSourcePath(self):
274        return self._getMarker('source') or None
275
276
277class DataCenterStorageMovedEvent(ObjectEvent):
278    """An event fired, when datacenter storage moves.
279    """
280    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.