source: main/waeup.sirp/branches/henrik-bootstrap/src/waeup/sirp/datacenter.py @ 8413

Last change on this file since 8413 was 7321, checked in by Henrik Bettermann, 13 years ago

Replace the term 'WAeUP' by SIRP which is a WAeUP product.

  • Property svn:keywords set to Id
File size: 9.4 KB
Line 
1## $Id: datacenter.py 7321 2011-12-10 06:15:17Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""SIRP data center.
19
20The waeup data center cares for management of upload data and provides
21tools for importing/exporting CSV data.
22"""
23import logging
24import os
25import shutil
26import grok
27from datetime import datetime
28from zope.component.interfaces import ObjectEvent
29from waeup.sirp.interfaces import (IDataCenter, IDataCenterFile,
30                                   IDataCenterStorageMovedEvent)
31from waeup.sirp.utils.helpers import copy_filesystem_tree
32from waeup.sirp.utils.logger import Logger
33
34class DataCenter(grok.Container, Logger):
35    """A data center contains CSV files.
36    """
37    grok.implements(IDataCenter)
38    storage = os.path.join(os.path.dirname(__file__), 'files')
39
40    logger_name = 'waeup.sirp.${sitename}.datacenter'
41    logger_filename = 'datacenter.log'
42
43    def __init__(self, *args, **kw):
44        super(DataCenter, self).__init__(*args, **kw)
45        self._createSubDirs()
46
47    def _createSubDirs(self):
48        """Create standard subdirs.
49        """
50        for name in ['finished', 'unfinished', 'logs']:
51            path = os.path.join(self.storage, name)
52            if os.path.exists(path):
53                continue
54            os.mkdir(path)
55        return
56
57    def getFiles(self, sort='name'):
58        """Get a list of files stored in `storage`.
59
60        Files are sorted by basename.
61        """
62        result = []
63        if not os.path.exists(self.storage):
64            return result
65        for filename in sorted(os.listdir(self.storage)):
66            fullpath = os.path.join(self.storage, filename)
67            if not os.path.isfile(fullpath):
68                continue
69            result.append(DataCenterFile(fullpath))
70        if sort == 'date':
71            # sort results in newest-first order...
72            result = sorted(result, key=lambda x: x.getTimeStamp(),
73                            reverse=True)
74        return result
75
76    def getLogFiles(self):
77        """Get the files from logs/ subdir. Files are sorted by name.
78        """
79        result = []
80        logdir = os.path.join(self.storage, 'logs')
81        if not os.path.exists(logdir):
82            os.mkdir(logdir)
83        for name in sorted(os.listdir(logdir)):
84            if not os.path.isfile(os.path.join(logdir, name)):
85                continue
86            result.append(
87                LogFile(os.path.join(self.storage, 'logs', name)))
88        return result
89
90    def setStoragePath(self, path, move=False, overwrite=False):
91        """Set the path where to store files.
92        """
93        path = os.path.abspath(path)
94        not_copied = []
95        if not os.path.exists(path):
96            raise ValueError('The path given does not exist: %s' % path)
97        if move is True:
98
99            not_copied = copy_filesystem_tree(self.storage, path,
100                                            overwrite=overwrite)
101        self.storage = path
102        self._createSubDirs()
103        grok.notify(DataCenterStorageMovedEvent(self))
104        return not_copied
105
106    def _moveFile(self, source, dest):
107        """Move file source to dest preserving ctime, mtime, etc.
108        """
109        if not os.path.exists(source):
110            self.logger.warn('No such source path: %s' % source)
111            return
112        if source == dest:
113            return
114        shutil.copyfile(source, dest)
115        shutil.copystat(source, dest)
116        os.unlink(source)
117
118    def distProcessedFiles(self, successful, source_path, finished_file,
119                           pending_file, mode='create', move_orig=True):
120        """Put processed files into final locations.
121
122        ``successful`` is a boolean that tells, whether processing was
123        successful.
124
125        ``source_path``: path to file that was processed.
126
127        ``finished_file``, ``pending_file``: paths to the respective
128        generated .pending and .finished file. The .pending file path
129        may be ``None``.
130
131        If finished file is placed in a location outside the local
132        storage dir, the complete directory is removed
133        afterwards. Regular importers should put their stuff in
134        dedicated temporary dirs.
135
136        See datacenter.txt for more info about how this works.
137        """
138        basename = os.path.basename(source_path)
139        pending_name = basename
140        pending = False
141        finished_dir = os.path.join(self.storage, 'finished')
142        unfinished_dir = os.path.join(self.storage, 'unfinished')
143
144        if basename.endswith('.pending.csv'):
145            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
146            maybe_src = os.path.join(unfinished_dir, maybe_basename)
147            if os.path.isfile(maybe_src):
148                basename = maybe_basename
149                pending = True
150
151        base, ext = os.path.splitext(basename)
152        finished_name = "%s.%s.finished%s" % (base, mode, ext)
153        if not pending:
154            pending_name = "%s.%s.pending%s" % (base, mode, ext)
155
156        # Put .pending and .finished file into respective places...
157        pending_dest = os.path.join(self.storage, pending_name)
158        finished_dest = os.path.join(finished_dir, finished_name)
159        self._moveFile(finished_file, finished_dest)
160        if pending_file is not None:
161            self._moveFile(pending_file, pending_dest)
162
163        # Put source file into final location...
164        finished_dest = os.path.join(finished_dir, basename)
165        unfinished_dest = os.path.join(unfinished_dir, basename)
166        if successful and not pending:
167            self._moveFile(source_path, finished_dest)
168        elif successful and pending:
169            self._moveFile(unfinished_dest, finished_dest)
170            os.unlink(source_path)
171        elif not successful and not pending:
172            self._moveFile(source_path, unfinished_dest)
173
174        # If finished and pending-file were created in a location
175        # outside datacenter storage, we remove it.
176        maybe_temp_dir = os.path.dirname(finished_file)
177        if os.path.commonprefix(
178            [self.storage, maybe_temp_dir]) != self.storage:
179            shutil.rmtree(maybe_temp_dir)
180        return
181
182
183class DataCenterFile(object):
184    """A description of a file stored in data center.
185    """
186    grok.implements(IDataCenterFile)
187
188    def __init__(self, context):
189        self.context = context
190        self.name = os.path.basename(self.context)
191        self.size = self.getSize()
192        self.uploaddate = self.getDate()
193        self.lines = self.getLinesNumber()
194
195    def getDate(self):
196        """Get a human readable datetime representation.
197        """
198        date = datetime.fromtimestamp(os.path.getctime(self.context))
199        return date.strftime("%Y-%m-%d %H:%M:%S")
200
201    def getTimeStamp(self):
202        """Get a (machine readable) timestamp.
203        """
204        return os.path.getctime(self.context)
205
206    def getSize(self):
207        """Get a human readable file size.
208        """
209        bytesize = os.path.getsize(self.context)
210        size = "%s bytes" % bytesize
211        units = ['kb', 'MB', 'GB']
212        for power, unit in reversed(list(enumerate(units))):
213            power += 1
214            if bytesize >= 1024 ** power:
215                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
216                break
217        return size
218
219    def getLinesNumber(self):
220        """Get number of lines.
221        """
222        num = 0
223        for line in open(self.context, 'rb'):
224            num += 1
225        return num
226
227class LogFile(DataCenterFile):
228    """A description of a log file.
229    """
230    def __init__(self, context):
231        super(LogFile, self).__init__(context)
232        self._markers = dict()
233        self._parsed = False
234        self.userid = self.getUserId()
235        self.mode = self.getMode()
236        self.stats = self.getStats()
237        self.source = self.getSourcePath()
238
239    def _parseFile(self, maxline=10):
240        """Find markers in a file.
241        """
242        if self._parsed:
243            return
244        for line in open(self.context, 'rb'):
245            line = line.strip()
246            if not ':' in line:
247                continue
248            name, text = line.split(':', 1)
249            self._markers[name.lower()] = text
250        self._parsed = True
251        return
252
253    def _getMarker(self, marker):
254        marker = marker.lower()
255        if not self._parsed:
256            self._parseFile()
257        if marker in self._markers.keys():
258            return self._markers[marker]
259
260    def getUserId(self):
261        return self._getMarker('user') or '<UNKNOWN>'
262
263    def getMode(self):
264        return self._getMarker('mode') or '<NOT SET>'
265
266    def getStats(self):
267        return self._getMarker('processed') or '<Info not avail.>'
268
269    def getSourcePath(self):
270        return self._getMarker('source') or None
271
272
273class DataCenterStorageMovedEvent(ObjectEvent):
274    """An event fired, when datacenter storage moves.
275    """
276    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.