source: main/waeup.uniben/trunk/src/waeup/uniben/scripts.py @ 14086

Last change on this file since 14086 was 13449, checked in by uli, 9 years ago

Scripts for orphaned beds.

As it turns out, we can change a local ZODB from the commandline. And
it is even not too complicated.

File size: 7.5 KB
Line 
1"""The export() function herein is called by the kofaexport script.
2
3To make this work, you have to pip install psutil in your local virtualenv.
4
5Other functions can be called from the commandline. These comprise:
6
7  - `bedless_students`
8  - `remove_orphan_beds`
9
10Once, you have installed uniben, you can use `bin/python-console`::
11
12
13  $ ,/bin/python-console
14  >>> from waeup.uniben.scripts import bedless_students
15  >>> studs = [x for x in bedless_students()]
16  ...  lots of output ...
17  ...  the cmd takes some time to finish ...
18  ...  please be patient ...
19  >>> len(studs)
20  196
21
22etc. Use Ctrl-D to quit.
23
24"""
25import argparse
26import gc
27import grok
28import os
29import tempfile
30import time
31import transaction
32from ZODB import DB, DemoStorage, FileStorage
33from ZODB.blob import BlobStorage
34from zope.component import getUtility
35from waeup.kofa.interfaces import ICSVExporter
36
37
38#: Remember what packages were grokked already.
39grokked = dict()
40
41
42#: Packages that have to be 'grokked' in order to register exporters
43#: and everything else.
44#: The order of these packages is usually relevant.
45TO_BE_GROKKED = ("waeup.kofa", "kofacustom.nigeria", "waeup.uniben")
46
47#: The data root from where we can look out for Data.fs, etc. This is
48#: normally the `var/` dir of an instance.
49VAR_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(
50    os.path.dirname(__file__)))), 'var')
51
52#: The name of the `University` instance in which we look for export
53#: items.
54APP_NAME = "uniben"
55
56#: The name under which the exporter to use is registered.
57EXPORTER_NAME = "students"
58
59
60def handle_options():
61    """Handle commandline options.
62    """
63    parser = argparse.ArgumentParser(
64        description="Export WAeUP kofa data")
65    args = parser.parse_args()
66    return args
67
68
69def grok_bases():
70    """Grok packages not already grokked.
71    """
72    for pkg_name in TO_BE_GROKKED:
73        if grokked.get(pkg_name, False):
74            continue
75        print("Grokking %s..." % pkg_name)
76        grokked[pkg_name] = True
77        grok.testing.grok(pkg_name)
78        print("Done.")
79
80
81def init_dbs(read_only=True):
82    """Setup databases.
83
84    We return a sequence of `closables`. The closables can be passed
85    to close_dbs() if you're done.
86
87    The first of the elements returned is the main database. open()
88    it for a new connection to the ZODB.
89    """
90    # https://github.com/zopefoundation/ZODB/\
91    #         blob/master/src/ZODB/cross-database-references.txt
92    databases = {}
93    db_dir = os.path.join(VAR_DIR, 'filestorage')
94    blob_dir = os.path.join(VAR_DIR, 'blobstorage')
95    db_path = os.path.join(db_dir, 'Data.fs')
96    async_db_path = os.path.join(db_dir, 'Data.async.fs')
97    async_storage = FileStorage.FileStorage(async_db_path, read_only=read_only)
98    db1 = DB(async_storage, database_name="async", databases=databases)
99    base_storage = FileStorage.FileStorage(db_path, read_only=read_only)
100    blob_storage = BlobStorage(blob_dir, base_storage)
101    db2 = DB(blob_storage, databases=databases)
102    db2.open()
103    grok_bases()
104    return (db2, db1, base_storage, blob_storage, async_storage)
105
106
107def close_dbs(closables):
108    """Close all elements in `closables`.
109    """
110    for elem in closables:
111        print("Closing %s..." % elem)
112        elem.close()
113        print("Done.")
114
115
116def get_university(conn):
117    return conn.root()["Application"][APP_NAME]
118
119
120def get_all_students(container, cnt_from=0, cnt_to=0):
121    cnt = 0
122    for key in container:
123        if cnt < cnt_from:
124            cnt += 1
125            continue
126        if cnt_to and (cnt > cnt_to):
127            break
128        cnt += 1
129        elem = container.get(key)
130        yield elem
131        del elem
132
133
134def partition(container, part_size=10000):
135    """Partition `container` into chunks.
136
137    Get a list of triples (<num>, <index_start>, <index_end>) which
138    represent chunks of elements from `container`.
139
140    The `container` object must support `len()`.
141
142    Split length of `container` and tell what partitions we get, if each
143    partition is size `part_size` or less.
144
145    For instance a container of size 250 and `part_size` 100 would give:
146
147      [(0,   0,  99),
148       (1, 100, 199),
149       (2, 200, 249),
150       ]
151
152    """
153    num = len(container)
154    print("Container elements: %s" % num)
155    return [
156        (idx, start, min(start + part_size - 1, num - 1))
157        for idx, start in enumerate(range(0, num, part_size))]
158
159
160def get_mem_info():
161    """Get current memory info.
162
163    This works only, if `psutil` is installed locally (in virtualenv).
164    Otherwise we return `None`.
165    """
166    try:
167        # late import. We do not want to make it a waeup.uniben dependency.
168        import psutil
169    except ImportError:
170        return
171    proc = psutil.Process(os.getpid())
172    return proc.get_memory_info().rss
173
174
175def export_part(container, part_num, start, end, path):
176    """Export part number `part_num` from `container` to `path`.
177
178    `path` is the filesystem path we want to export to. `start` and
179    `end` are the index numbers of the elements we want to
180    export. Indexes are zero-based (starting with zero, not one).
181    """
182    gc.collect()
183    mem1 = get_mem_info()
184    print("  Export %s-%s to %s (mem: %s)" % (start, end, path, mem1))
185    tp1 = time.time()
186    exporter = getUtility(ICSVExporter, name=EXPORTER_NAME)
187    exporter.export(get_all_students(container, start, end), path)
188    tp2 = time.time()
189    mem2 = get_mem_info()
190    print("  Done (%s secs, mem: %s)" % (tp2 - tp1, mem2))
191
192
193def export():
194    """Main function.
195    """
196    options = handle_options()
197    closables = init_dbs()
198    conn = closables[0].open()
199    uni = get_university(conn)
200    studs = uni['students']
201
202    parts = partition(studs)
203
204    parts = [(0, 0, 100), ]
205    #parts = [(0, 0, 4999), (1, 5000, 9999), (2, 10000, 14999)]
206    #parts = [(0, 0, 14999), ]
207    #parts = [(0, 0, 160000), ]
208
209    workdir = tempfile.mkdtemp()
210
211    t1 = time.time()
212    conn.close()  # every loop will reopen the connection
213    for p_num, start, end in parts:
214        conn = closables[0].open()
215        uni = get_university(conn)
216        studs = uni['students']
217        curr_path = os.path.join(workdir, "myexport%s.csv" % p_num)
218        export_part(studs, p_num, start, end, curr_path)
219        conn.close()
220    print("Result in %s" % workdir)
221    t2 = time.time()
222    print("Elapsed: %s secs" % (t2 - t1))
223
224
225def bedless_students(university_inst=None):
226    """Get students with beds gone.
227    """
228    conn, closeables = None, None
229    if university_inst is None:
230        closeables = init_dbs(read_only=False)
231        conn = closeables[0].open()
232        university_inst = get_university(conn)
233    for stud_id in university_inst['students']:
234        stud = university_inst['students'][stud_id]
235        if not 'accommodation' in stud.keys():
236            continue
237        accomm = stud['accommodation']
238        for bed_ticket_id in accomm.keys():
239            ticket = accomm[bed_ticket_id]
240            bed = ticket.bed
241            if bed is None:
242                continue
243            if getattr(
244                getattr(bed, '__parent__', None),
245                    '__parent__', None) is None:
246                yield stud_id, ticket, bed
247    if conn is not None:
248        conn.close()
249        close_dbs(closeables)
250
251
252def remove_orphan_beds():
253    """Delete orphaned beds.
254    """
255    closeables = init_dbs(read_only=False)
256    conn = closeables[0].open()
257    uni = get_university(conn)
258    for stud_id, bed_ticket, bed in bedless_students(uni):
259        print("DELETE bed of student %s" % stud_id)
260        bed_ticket.bed = None
261        del bed
262    transaction.commit()
263    conn.close()
264    close_dbs(closeables)
Note: See TracBrowser for help on using the repository browser.