1 | """The export() function herein is called by the kofaexport script. |
---|
2 | |
---|
3 | To make this work, you have to pip install psutil in your local virtualenv. |
---|
4 | |
---|
5 | Other functions can be called from the commandline. These comprise: |
---|
6 | |
---|
7 | - `bedless_students` |
---|
8 | - `remove_orphan_beds` |
---|
9 | |
---|
10 | Once, you have installed uniben, you can use `bin/python-console`:: |
---|
11 | |
---|
12 | |
---|
13 | $ ,/bin/python-console |
---|
14 | >>> from waeup.uniben.scripts import bedless_students |
---|
15 | >>> studs = [x for x in bedless_students()] |
---|
16 | ... lots of output ... |
---|
17 | ... the cmd takes some time to finish ... |
---|
18 | ... please be patient ... |
---|
19 | >>> len(studs) |
---|
20 | 196 |
---|
21 | |
---|
22 | etc. Use Ctrl-D to quit. |
---|
23 | |
---|
24 | """ |
---|
25 | import argparse |
---|
26 | import gc |
---|
27 | import grok |
---|
28 | import os |
---|
29 | import tempfile |
---|
30 | import time |
---|
31 | import transaction |
---|
32 | from ZODB import DB, DemoStorage, FileStorage |
---|
33 | from ZODB.blob import BlobStorage |
---|
34 | from zope.component import getUtility |
---|
35 | from waeup.kofa.interfaces import ICSVExporter |
---|
36 | |
---|
37 | |
---|
38 | #: Remember what packages were grokked already. |
---|
39 | grokked = dict() |
---|
40 | |
---|
41 | |
---|
42 | #: Packages that have to be 'grokked' in order to register exporters |
---|
43 | #: and everything else. |
---|
44 | #: The order of these packages is usually relevant. |
---|
45 | TO_BE_GROKKED = ("waeup.kofa", "kofacustom.nigeria", "waeup.uniben") |
---|
46 | |
---|
47 | #: The data root from where we can look out for Data.fs, etc. This is |
---|
48 | #: normally the `var/` dir of an instance. |
---|
49 | VAR_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( |
---|
50 | os.path.dirname(__file__)))), 'var') |
---|
51 | |
---|
52 | #: The name of the `University` instance in which we look for export |
---|
53 | #: items. |
---|
54 | APP_NAME = "uniben" |
---|
55 | |
---|
56 | #: The name under which the exporter to use is registered. |
---|
57 | EXPORTER_NAME = "students" |
---|
58 | |
---|
59 | |
---|
60 | def handle_options(): |
---|
61 | """Handle commandline options. |
---|
62 | """ |
---|
63 | parser = argparse.ArgumentParser( |
---|
64 | description="Export WAeUP kofa data") |
---|
65 | args = parser.parse_args() |
---|
66 | return args |
---|
67 | |
---|
68 | |
---|
69 | def grok_bases(): |
---|
70 | """Grok packages not already grokked. |
---|
71 | """ |
---|
72 | for pkg_name in TO_BE_GROKKED: |
---|
73 | if grokked.get(pkg_name, False): |
---|
74 | continue |
---|
75 | print("Grokking %s..." % pkg_name) |
---|
76 | grokked[pkg_name] = True |
---|
77 | grok.testing.grok(pkg_name) |
---|
78 | print("Done.") |
---|
79 | |
---|
80 | |
---|
81 | def init_dbs(read_only=True): |
---|
82 | """Setup databases. |
---|
83 | |
---|
84 | We return a sequence of `closables`. The closables can be passed |
---|
85 | to close_dbs() if you're done. |
---|
86 | |
---|
87 | The first of the elements returned is the main database. open() |
---|
88 | it for a new connection to the ZODB. |
---|
89 | """ |
---|
90 | # https://github.com/zopefoundation/ZODB/\ |
---|
91 | # blob/master/src/ZODB/cross-database-references.txt |
---|
92 | databases = {} |
---|
93 | db_dir = os.path.join(VAR_DIR, 'filestorage') |
---|
94 | blob_dir = os.path.join(VAR_DIR, 'blobstorage') |
---|
95 | db_path = os.path.join(db_dir, 'Data.fs') |
---|
96 | async_db_path = os.path.join(db_dir, 'Data.async.fs') |
---|
97 | async_storage = FileStorage.FileStorage(async_db_path, read_only=read_only) |
---|
98 | db1 = DB(async_storage, database_name="async", databases=databases) |
---|
99 | base_storage = FileStorage.FileStorage(db_path, read_only=read_only) |
---|
100 | blob_storage = BlobStorage(blob_dir, base_storage) |
---|
101 | db2 = DB(blob_storage, databases=databases) |
---|
102 | db2.open() |
---|
103 | grok_bases() |
---|
104 | return (db2, db1, base_storage, blob_storage, async_storage) |
---|
105 | |
---|
106 | |
---|
107 | def close_dbs(closables): |
---|
108 | """Close all elements in `closables`. |
---|
109 | """ |
---|
110 | for elem in closables: |
---|
111 | print("Closing %s..." % elem) |
---|
112 | elem.close() |
---|
113 | print("Done.") |
---|
114 | |
---|
115 | |
---|
116 | def get_university(conn): |
---|
117 | return conn.root()["Application"][APP_NAME] |
---|
118 | |
---|
119 | |
---|
120 | def get_all_students(container, cnt_from=0, cnt_to=0): |
---|
121 | cnt = 0 |
---|
122 | for key in container: |
---|
123 | if cnt < cnt_from: |
---|
124 | cnt += 1 |
---|
125 | continue |
---|
126 | if cnt_to and (cnt > cnt_to): |
---|
127 | break |
---|
128 | cnt += 1 |
---|
129 | elem = container.get(key) |
---|
130 | yield elem |
---|
131 | del elem |
---|
132 | |
---|
133 | |
---|
134 | def partition(container, part_size=10000): |
---|
135 | """Partition `container` into chunks. |
---|
136 | |
---|
137 | Get a list of triples (<num>, <index_start>, <index_end>) which |
---|
138 | represent chunks of elements from `container`. |
---|
139 | |
---|
140 | The `container` object must support `len()`. |
---|
141 | |
---|
142 | Split length of `container` and tell what partitions we get, if each |
---|
143 | partition is size `part_size` or less. |
---|
144 | |
---|
145 | For instance a container of size 250 and `part_size` 100 would give: |
---|
146 | |
---|
147 | [(0, 0, 99), |
---|
148 | (1, 100, 199), |
---|
149 | (2, 200, 249), |
---|
150 | ] |
---|
151 | |
---|
152 | """ |
---|
153 | num = len(container) |
---|
154 | print("Container elements: %s" % num) |
---|
155 | return [ |
---|
156 | (idx, start, min(start + part_size - 1, num - 1)) |
---|
157 | for idx, start in enumerate(range(0, num, part_size))] |
---|
158 | |
---|
159 | |
---|
160 | def get_mem_info(): |
---|
161 | """Get current memory info. |
---|
162 | |
---|
163 | This works only, if `psutil` is installed locally (in virtualenv). |
---|
164 | Otherwise we return `None`. |
---|
165 | """ |
---|
166 | try: |
---|
167 | # late import. We do not want to make it a waeup.uniben dependency. |
---|
168 | import psutil |
---|
169 | except ImportError: |
---|
170 | return |
---|
171 | proc = psutil.Process(os.getpid()) |
---|
172 | return proc.get_memory_info().rss |
---|
173 | |
---|
174 | |
---|
175 | def export_part(container, part_num, start, end, path): |
---|
176 | """Export part number `part_num` from `container` to `path`. |
---|
177 | |
---|
178 | `path` is the filesystem path we want to export to. `start` and |
---|
179 | `end` are the index numbers of the elements we want to |
---|
180 | export. Indexes are zero-based (starting with zero, not one). |
---|
181 | """ |
---|
182 | gc.collect() |
---|
183 | mem1 = get_mem_info() |
---|
184 | print(" Export %s-%s to %s (mem: %s)" % (start, end, path, mem1)) |
---|
185 | tp1 = time.time() |
---|
186 | exporter = getUtility(ICSVExporter, name=EXPORTER_NAME) |
---|
187 | exporter.export(get_all_students(container, start, end), path) |
---|
188 | tp2 = time.time() |
---|
189 | mem2 = get_mem_info() |
---|
190 | print(" Done (%s secs, mem: %s)" % (tp2 - tp1, mem2)) |
---|
191 | |
---|
192 | |
---|
193 | def export(): |
---|
194 | """Main function. |
---|
195 | """ |
---|
196 | options = handle_options() |
---|
197 | closables = init_dbs() |
---|
198 | conn = closables[0].open() |
---|
199 | uni = get_university(conn) |
---|
200 | studs = uni['students'] |
---|
201 | |
---|
202 | parts = partition(studs) |
---|
203 | |
---|
204 | parts = [(0, 0, 100), ] |
---|
205 | #parts = [(0, 0, 4999), (1, 5000, 9999), (2, 10000, 14999)] |
---|
206 | #parts = [(0, 0, 14999), ] |
---|
207 | #parts = [(0, 0, 160000), ] |
---|
208 | |
---|
209 | workdir = tempfile.mkdtemp() |
---|
210 | |
---|
211 | t1 = time.time() |
---|
212 | conn.close() # every loop will reopen the connection |
---|
213 | for p_num, start, end in parts: |
---|
214 | conn = closables[0].open() |
---|
215 | uni = get_university(conn) |
---|
216 | studs = uni['students'] |
---|
217 | curr_path = os.path.join(workdir, "myexport%s.csv" % p_num) |
---|
218 | export_part(studs, p_num, start, end, curr_path) |
---|
219 | conn.close() |
---|
220 | print("Result in %s" % workdir) |
---|
221 | t2 = time.time() |
---|
222 | print("Elapsed: %s secs" % (t2 - t1)) |
---|
223 | |
---|
224 | |
---|
225 | def bedless_students(university_inst=None): |
---|
226 | """Get students with beds gone. |
---|
227 | """ |
---|
228 | conn, closeables = None, None |
---|
229 | if university_inst is None: |
---|
230 | closeables = init_dbs(read_only=False) |
---|
231 | conn = closeables[0].open() |
---|
232 | university_inst = get_university(conn) |
---|
233 | for stud_id in university_inst['students']: |
---|
234 | stud = university_inst['students'][stud_id] |
---|
235 | if not 'accommodation' in stud.keys(): |
---|
236 | continue |
---|
237 | accomm = stud['accommodation'] |
---|
238 | for bed_ticket_id in accomm.keys(): |
---|
239 | ticket = accomm[bed_ticket_id] |
---|
240 | bed = ticket.bed |
---|
241 | if bed is None: |
---|
242 | continue |
---|
243 | if getattr( |
---|
244 | getattr(bed, '__parent__', None), |
---|
245 | '__parent__', None) is None: |
---|
246 | yield stud_id, ticket, bed |
---|
247 | if conn is not None: |
---|
248 | conn.close() |
---|
249 | close_dbs(closeables) |
---|
250 | |
---|
251 | |
---|
252 | def remove_orphan_beds(): |
---|
253 | """Delete orphaned beds. |
---|
254 | """ |
---|
255 | closeables = init_dbs(read_only=False) |
---|
256 | conn = closeables[0].open() |
---|
257 | uni = get_university(conn) |
---|
258 | for stud_id, bed_ticket, bed in bedless_students(uni): |
---|
259 | print("DELETE bed of student %s" % stud_id) |
---|
260 | bed_ticket.bed = None |
---|
261 | del bed |
---|
262 | transaction.commit() |
---|
263 | conn.close() |
---|
264 | close_dbs(closeables) |
---|