source: main/waeup.sirp/branches/ulif-fasttables/src/waeup/sirp/datacenter.txt @ 11105

Last change on this file since 11105 was 5140, checked in by uli, 15 years ago

Update all unit tests that use the ZCA to run inside the new unit test layer.

File size: 7.8 KB
Line 
1WAeUP Data Center
2*****************
3
4The WAeUP data center cares for managing CSV files and importing then.
5
6.. :doctest:
7.. :layer: waeup.sirp.testing.WAeUPSIRPUnitTestLayer
8
9Creating a data center
10======================
11
12A data center can be created easily:
13
14    >>> from waeup.sirp.datacenter import DataCenter
15    >>> mydatacenter = DataCenter()
16    >>> mydatacenter
17    <waeup.sirp.datacenter.DataCenter object at 0x...>
18
19Each data center has a location in file system where files are stored:
20
21    >>> storagepath = mydatacenter.storage
22    >>> storagepath
23    '/.../waeup/sirp/files'
24
25
26Managing the storage path
27-------------------------
28
29We can set another storage path:
30
31    >>> import os
32    >>> os.mkdir('newlocation')
33    >>> newpath = os.path.abspath('newlocation')
34    >>> mydatacenter.setStoragePath(newpath)
35    []
36
37The result here is a list of filenames, that could not be
38copied. Luckily, this list is empty.
39
40When we set a new storage path, we can tell to move all files in the
41old location to the new one. To see this feature in action, we first
42have to put a file into the old location:
43
44    >>> open(os.path.join(newpath, 'myfile.txt'), 'wb').write('hello')
45
46Now we can set a new location and the file will be copied:
47
48    >>> verynewpath = os.path.abspath('verynewlocation')
49    >>> os.mkdir(verynewpath)
50
51    >>> mydatacenter.setStoragePath(verynewpath, move=True)
52    []
53
54    >>> storagepath = mydatacenter.storage
55    >>> 'myfile.txt' in os.listdir(verynewpath)
56    True
57
58We remove the created file to have a clean testing environment for
59upcoming examples:
60
61    >>> os.unlink(os.path.join(storagepath, 'myfile.txt'))
62
63Uploading files
64===============
65
66We can get a list of files stored in that location:
67
68    >>> mydatacenter.getFiles()
69    []
70
71Let's put some file in the storage:
72
73    >>> import os
74    >>> filepath = os.path.join(storagepath, 'data.csv')
75    >>> open(filepath, 'wb').write('Some Content\n')
76
77Now we can find a file:
78
79    >>> mydatacenter.getFiles()
80    [<waeup.sirp.datacenter.DataCenterFile object at 0x...>]
81
82As we can see, the actual file is wrapped by a convenience wrapper,
83that enables us to fetch some data about the file. The data returned
84is formatted in strings, so that it can easily be put into output
85pages:
86
87    >>> datafile = mydatacenter.getFiles()[0]
88    >>> datafile.getSize()
89    '13 bytes'
90
91    >>> datafile.getDate() # Nearly current datetime...
92    '...'
93
94Clean up:
95
96    >>> import shutil
97    >>> shutil.rmtree(newpath)
98    >>> shutil.rmtree(verynewpath)
99
100
101Distributing processed files
102============================
103
104When files were processed by a batch processor, we can put the
105resulting files into desired destinations.
106
107We recreate the datacenter root in case it is missing:
108
109    >>> import os
110    >>> dc_root = mydatacenter.storage
111    >>> fin_dir = os.path.join(dc_root, 'finished')
112    >>> unfin_dir = os.path.join(dc_root, 'unfinished')
113
114    >>> def recreate_dc_storage():
115    ...   if os.path.exists(dc_root):
116    ...     shutil.rmtree(dc_root)
117    ...   os.mkdir(dc_root)
118    ...   mydatacenter.setStoragePath(mydatacenter.storage)
119    >>> recreate_dc_storage()
120
121We define a function that creates a set of faked result files:
122
123    >>> import os
124    >>> import tempfile
125    >>> def create_fake_results(source_basename, create_pending=True):
126    ...   tmp_dir = tempfile.mkdtemp()
127    ...   src = os.path.join(dc_root, source_basename)
128    ...   pending_src = None
129    ...   if create_pending:
130    ...     pending_src = os.path.join(tmp_dir, 'mypendingsource.csv')
131    ...   finished_src = os.path.join(tmp_dir, 'myfinishedsource.csv')
132    ...   for path in (src, pending_src, finished_src):
133    ...     if path is not None:
134    ...       open(path, 'wb').write('blah')
135    ...   return tmp_dir, src, finished_src, pending_src
136
137Now we can create the set of result files, that typically come after a
138successful processing of a regular source:
139
140Now we can try to distribute those files. Let's start with a source
141file, that was processed successfully:
142
143    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
144    ...  'mysource.csv', create_pending=False)
145    >>> mydatacenter.distProcessedFiles(True, src, finished_src,
146    ...                            pending_src, mode='create')
147    >>> sorted(os.listdir(dc_root))
148    ['finished', 'logs', 'unfinished']
149
150    >>> sorted(os.listdir(fin_dir))
151    ['mysource.create.finished.csv', 'mysource.csv']
152
153    >>> sorted(os.listdir(unfin_dir))
154    []
155
156The created dir will be removed for us by the datacenter. This way we
157can assured, that less temporary dirs are left hanging around:
158
159    >>> os.path.exists(tmp_dir)
160    False
161
162The root dir is empty, while the original file and the file containing
163all processed data were moved to'finished/'.
164
165Now we restart, but this time we fake an erranous action:
166
167    >>> recreate_dc_storage()
168    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
169    ...  'mysource.csv')
170    >>> mydatacenter.distProcessedFiles(False, src, finished_src,
171    ...                                 pending_src, mode='create')
172    >>> sorted(os.listdir(dc_root))
173    ['finished', 'logs', 'mysource.create.pending.csv', 'unfinished']
174
175    >>> sorted(os.listdir(fin_dir))
176    ['mysource.create.finished.csv']
177
178    >>> sorted(os.listdir(unfin_dir))
179    ['mysource.csv']
180
181While the original source was moved to the 'unfinished' dir, the
182pending file went to the root and the set of already processed items
183are stored in finished/.
184
185We fake processing the pending file and assume that everything went
186well this time:
187
188    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
189    ...  'mysource.create.pending.csv', create_pending=False)
190    >>> mydatacenter.distProcessedFiles(True, src, finished_src,
191    ...                                 pending_src, mode='create')
192
193    >>> sorted(os.listdir(dc_root))
194    ['finished', 'logs', 'unfinished']
195
196    >>> sorted(os.listdir(fin_dir))
197    ['mysource.create.finished.csv', 'mysource.csv']
198
199    >>> sorted(os.listdir(unfin_dir))
200    []
201
202The result is the same as in the first case shown above.
203
204We restart again, but this time we fake several non-working imports in
205a row.
206
207We start with a faulty start-import:
208
209    >>> recreate_dc_storage()
210    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
211    ...  'mysource.csv')
212    >>> mydatacenter.distProcessedFiles(False, src, finished_src,
213    ...                                 pending_src, mode='create')
214
215We try to process the pending file, which fails again:
216
217    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
218    ...  'mysource.create.pending.csv')
219    >>> mydatacenter.distProcessedFiles(False, src, finished_src,
220    ...                                 pending_src, mode='create')
221
222We try to process the new pending file:
223
224    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
225    ...  'mysource.create.pending.csv')
226    >>> mydatacenter.distProcessedFiles(False, src, finished_src,
227    ...                                 pending_src, mode='create')
228
229    >>> sorted(os.listdir(dc_root))
230    ['finished', 'logs', 'mysource.create.pending.csv', 'unfinished']
231
232    >>> sorted(os.listdir(fin_dir))
233    ['mysource.create.finished.csv']
234
235    >>> sorted(os.listdir(unfin_dir))
236    ['mysource.csv']
237
238Finally, we process the pending file and everything works:
239
240    >>> tmp_dir, src, finished_src, pending_src = create_fake_results(
241    ...  'mysource.create.pending.csv', create_pending=False)
242    >>> mydatacenter.distProcessedFiles(True, src, finished_src,
243    ...                                 pending_src, mode='create')
244
245    >>> sorted(os.listdir(dc_root))
246    ['finished', 'logs', 'unfinished']
247
248    >>> sorted(os.listdir(fin_dir))
249    ['mysource.create.finished.csv', 'mysource.csv']
250
251    >>> sorted(os.listdir(unfin_dir))
252    []
253
254The root dir is empty (contains no input files) and only the files in
255finished-subdirectory remain.
256
257Clean up:
258
259    >>> shutil.rmtree(verynewpath)
Note: See TracBrowser for help on using the repository browser.