source: main/waeup.sirp/branches/ulif-fasttables/src/waeup/sirp/datacenter.py @ 5269

Last change on this file since 5269 was 4996, checked in by uli, 15 years ago

Make filenames even longer.

File size: 10.0 KB
Line 
1"""WAeUP data center.
2
3The waeup data center cares for management of upload data and provides
4tools for importing/exporting CSV data.
5"""
6import logging
7import os
8import shutil
9import grok
10from datetime import datetime
11from zope.component.interfaces import ObjectEvent
12from waeup.sirp.interfaces import (IDataCenter, IDataCenterFile,
13                                   IDataCenterStorageMovedEvent)
14from waeup.sirp.utils.helpers import copyFileSystemTree
15
16class DataCenter(grok.Container):
17    """A data center contains CSV files.
18    """
19    grok.implements(IDataCenter)
20    storage = os.path.join(os.path.dirname(__file__), 'files')
21
22    @property
23    def logger(self):
24        """Get a logger for datacenter actions.
25        """
26        # We need a different logger for every site...
27        site = grok.getSite()
28        sitename = getattr(site, '__name__', 'app')
29        loggername = 'waeup.sirp.%s.datacenter' % sitename
30        logger = logging.getLogger(loggername)
31        if not logger.handlers:
32            logger = self._setupLogger(logger)
33        return logger
34
35    def __init__(self, *args, **kw):
36        super(DataCenter, self).__init__(*args, **kw)
37        self._createSubDirs()
38       
39    def _setupLogger(self, logger):
40        """Setup datacenter logger.
41        """
42        logdir = os.path.join(self.storage, 'logs')
43        if not os.path.exists(logdir):
44            os.mkdir(logdir)
45        filename = os.path.join(logdir, 'datacenter.log')
46
47        # Create a rotating file handler logger for datacenter.
48        handler = logging.handlers.RotatingFileHandler(
49            filename, maxBytes=5*1024**1, backupCount=5)
50        formatter = logging.Formatter(
51            '%(asctime)s - %(levelname)s - %(message)s')
52        handler.setFormatter(formatter)
53       
54        # Here we decide, whether our messages will _also_ go to
55        # application log.
56        logger.propagate = False
57        logger.setLevel(logging.DEBUG)
58        logger.addHandler(handler)
59        return logger
60
61    def _createSubDirs(self):
62        """Create standard subdirs.
63        """
64        for name in ['finished', 'unfinished']:
65            path = os.path.join(self.storage, name)
66            if os.path.exists(path):
67                continue
68            os.mkdir(path)
69        return
70   
71    def getFiles(self, sort='name'):
72        """Get a list of files stored in `storage`.
73
74        Files are sorted by basename.
75        """
76        result = []
77        if not os.path.exists(self.storage):
78            return result
79        for filename in sorted(os.listdir(self.storage)):
80            fullpath = os.path.join(self.storage, filename)
81            if not os.path.isfile(fullpath):
82                continue
83            result.append(DataCenterFile(fullpath))
84        if sort == 'date':
85            # sort results in newest-first order...
86            result = sorted(result, key=lambda x: x.getTimeStamp(),
87                            reverse=True)
88        return result
89
90    def getLogFiles(self):
91        """Get the files from logs/ subdir. Files are sorted by name.
92        """
93        result = []
94        logdir = os.path.join(self.storage, 'logs')
95        if not os.path.exists(logdir):
96            os.mkdir(logdir)
97        for name in sorted(os.listdir(logdir)):
98            if not os.path.isfile(os.path.join(logdir, name)):
99                continue
100            result.append(
101                LogFile(os.path.join(self.storage, 'logs', name)))
102        return result
103           
104    def setStoragePath(self, path, move=False, overwrite=False):
105        """Set the path where to store files.
106        """
107        path = os.path.abspath(path)
108        not_copied = []
109        if not os.path.exists(path):
110            raise ValueError('The path given does not exist: %s' % path)
111        if move is True:
112
113            not_copied = copyFileSystemTree(self.storage, path,
114                                            overwrite=overwrite)
115        self.storage = path
116        self._createSubDirs()
117        # Adjust logger...
118        logger = self.logger
119        handlers = logger.handlers
120        for handler in handlers:
121            logger.removeHandler(handler)
122        self._setupLogger(logger)
123        grok.notify(DataCenterStorageMovedEvent(self))
124        return not_copied
125
126    def _moveFile(self, source, dest):
127        """Move file source to dest preserving ctime, mtime, etc.
128        """
129        if not os.path.exists(source):
130            self.logger.warn('No such source path: %s' % source)
131            return
132        if source == dest:
133            return
134        shutil.copyfile(source, dest)
135        shutil.copystat(source, dest)
136        os.unlink(source)
137   
138    def distProcessedFiles(self, successful, source_path, finished_file,
139                           pending_file, mode='create', move_orig=True):
140        """Put processed files into final locations.
141
142        ``successful`` is a boolean that tells, whether processing was
143        successful.
144
145        ``source_path``: path to file that was processed.
146
147        ``finished_file``, ``pending_file``: paths to the respective
148        generated .pending and .finished file. The .pending file path
149        may be ``None``.
150
151        If finished file is placed in a location outside the local
152        storage dir, the complete directory is removed
153        afterwards. Regular importers should put their stuff in
154        dedicated temporary dirs.
155       
156        See datacenter.txt for more info about how this works.
157        """
158        basename = os.path.basename(source_path)
159        pending_name = basename
160        pending = False
161        finished_dir = os.path.join(self.storage, 'finished')
162        unfinished_dir = os.path.join(self.storage, 'unfinished')
163
164        if basename.endswith('.pending.csv'):
165            maybe_basename = "%s.csv" % basename.rsplit('.', 3)[0]
166            maybe_src = os.path.join(unfinished_dir, maybe_basename)
167            if os.path.isfile(maybe_src):
168                basename = maybe_basename
169                pending = True
170               
171        base, ext = os.path.splitext(basename)
172        finished_name = "%s.%s.finished%s" % (base, mode, ext)
173        if not pending:
174            pending_name = "%s.%s.pending%s" % (base, mode, ext)
175
176        # Put .pending and .finished file into respective places...
177        pending_dest = os.path.join(self.storage, pending_name)
178        finished_dest = os.path.join(finished_dir, finished_name)
179        self._moveFile(finished_file, finished_dest)
180        if pending_file is not None:
181            self._moveFile(pending_file, pending_dest)
182
183        # Put source file into final location...
184        finished_dest = os.path.join(finished_dir, basename)
185        unfinished_dest = os.path.join(unfinished_dir, basename)
186        if successful and not pending:
187            self._moveFile(source_path, finished_dest)
188        elif successful and pending:
189            self._moveFile(unfinished_dest, finished_dest)
190            os.unlink(source_path)
191        elif not successful and not pending:
192            self._moveFile(source_path, unfinished_dest)
193
194        # If finished and pending-file were created in a location
195        # outside datacenter storage, we remove it.
196        maybe_temp_dir = os.path.dirname(finished_file)
197        if os.path.commonprefix(
198            [self.storage, maybe_temp_dir]) != self.storage:
199            shutil.rmtree(maybe_temp_dir)
200        return
201
202   
203class DataCenterFile(object):
204    """A description of a file stored in data center.
205    """
206    grok.implements(IDataCenterFile)
207   
208    def __init__(self, context):
209        self.context = context
210        self.name = os.path.basename(self.context)
211        self.size = self.getSize()
212        self.uploaddate = self.getDate()
213        self.lines = self.getLinesNumber()
214
215    def getDate(self):
216        """Get a human readable datetime representation.
217        """
218        date = datetime.fromtimestamp(os.path.getctime(self.context))
219        return date.strftime('%c')
220
221    def getTimeStamp(self):
222        """Get a (machine readable) timestamp.
223        """
224        return os.path.getctime(self.context)
225   
226    def getSize(self):
227        """Get a human readable file size.
228        """
229        bytesize = os.path.getsize(self.context)
230        size = "%s bytes" % bytesize
231        units = ['kb', 'MB', 'GB']
232        for power, unit in reversed(list(enumerate(units))):
233            power += 1
234            if bytesize >= 1024 ** power:
235                size = "%.2f %s" % (bytesize/(1024.0**power), unit)
236                break
237        return size
238
239    def getLinesNumber(self):
240        """Get number of lines.
241        """
242        num = 0
243        for line in open(self.context, 'rb'):
244            num += 1
245        return num
246   
247class LogFile(DataCenterFile):
248    """A description of a log file.
249    """
250    def __init__(self, context):
251        super(LogFile, self).__init__(context)
252        self._markers = dict()
253        self._parsed = False
254        self.userid = self.getUserId()
255        self.mode = self.getMode()
256        self.stats = self.getStats()
257        self.source = self.getSourcePath()
258
259    def _parseFile(self, maxline=10):
260        """Find markers in a file.
261        """
262        if self._parsed:
263            return
264        for line in open(self.context, 'rb'):
265            line = line.strip()
266            if not ':' in line:
267                continue
268            name, text = line.split(':', 1)
269            self._markers[name.lower()] = text
270        self._parsed = True
271        return
272
273    def _getMarker(self, marker):
274        marker = marker.lower()
275        if not self._parsed:
276            self._parseFile()
277        if marker in self._markers.keys():
278            return self._markers[marker]
279   
280    def getUserId(self):
281        return self._getMarker('user') or '<UNKNOWN>'
282
283    def getMode(self):
284        return self._getMarker('mode') or '<NOT SET>'
285
286    def getStats(self):
287        return self._getMarker('processed') or '<Info not avail.>'
288
289    def getSourcePath(self):
290        return self._getMarker('source') or None
291
292
293class DataCenterStorageMovedEvent(ObjectEvent):
294    """An event fired, when datacenter storage moves.
295    """
296    grok.implements(IDataCenterStorageMovedEvent)
Note: See TracBrowser for help on using the repository browser.