source: main/waeup.uniben/trunk/src/waeup/uniben/scripts.py @ 16620

Last change on this file since 16620 was 13449, checked in by uli, 9 years ago

Scripts for orphaned beds.

As it turns out, we can change a local ZODB from the commandline. And
it is even not too complicated.

File size: 7.5 KB
RevLine 
[13190]1"""The export() function herein is called by the kofaexport script.
2
3To make this work, you have to pip install psutil in your local virtualenv.
[13449]4
5Other functions can be called from the commandline. These comprise:
6
7  - `bedless_students`
8  - `remove_orphan_beds`
9
10Once, you have installed uniben, you can use `bin/python-console`::
11
12
13  $ ,/bin/python-console
14  >>> from waeup.uniben.scripts import bedless_students
15  >>> studs = [x for x in bedless_students()]
16  ...  lots of output ...
17  ...  the cmd takes some time to finish ...
18  ...  please be patient ...
19  >>> len(studs)
20  196
21
22etc. Use Ctrl-D to quit.
23
[13190]24"""
[13210]25import argparse
[13190]26import gc
27import grok
28import os
29import tempfile
30import time
[13449]31import transaction
[13190]32from ZODB import DB, DemoStorage, FileStorage
33from ZODB.blob import BlobStorage
34from zope.component import getUtility
35from waeup.kofa.interfaces import ICSVExporter
36
37
38#: Remember what packages were grokked already.
39grokked = dict()
40
41
[13191]42#: Packages that have to be 'grokked' in order to register exporters
43#: and everything else.
44#: The order of these packages is usually relevant.
45TO_BE_GROKKED = ("waeup.kofa", "kofacustom.nigeria", "waeup.uniben")
[13190]46
[13208]47#: The data root from where we can look out for Data.fs, etc. This is
48#: normally the `var/` dir of an instance.
49VAR_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(
50    os.path.dirname(__file__)))), 'var')
[13191]51
[13203]52#: The name of the `University` instance in which we look for export
53#: items.
54APP_NAME = "uniben"
[13191]55
[13204]56#: The name under which the exporter to use is registered.
57EXPORTER_NAME = "students"
[13203]58
[13209]59
[13210]60def handle_options():
61    """Handle commandline options.
62    """
63    parser = argparse.ArgumentParser(
64        description="Export WAeUP kofa data")
65    args = parser.parse_args()
66    return args
67
68
[13190]69def grok_bases():
70    """Grok packages not already grokked.
71    """
[13191]72    for pkg_name in TO_BE_GROKKED:
[13190]73        if grokked.get(pkg_name, False):
74            continue
75        print("Grokking %s..." % pkg_name)
76        grokked[pkg_name] = True
77        grok.testing.grok(pkg_name)
78        print("Done.")
79
80
[13449]81def init_dbs(read_only=True):
[13190]82    """Setup databases.
83
84    We return a sequence of `closables`. The closables can be passed
85    to close_dbs() if you're done.
86
87    The first of the elements returned is the main database. open()
88    it for a new connection to the ZODB.
89    """
[13194]90    # https://github.com/zopefoundation/ZODB/\
91    #         blob/master/src/ZODB/cross-database-references.txt
[13190]92    databases = {}
[13208]93    db_dir = os.path.join(VAR_DIR, 'filestorage')
94    blob_dir = os.path.join(VAR_DIR, 'blobstorage')
[13190]95    db_path = os.path.join(db_dir, 'Data.fs')
96    async_db_path = os.path.join(db_dir, 'Data.async.fs')
[13449]97    async_storage = FileStorage.FileStorage(async_db_path, read_only=read_only)
[13190]98    db1 = DB(async_storage, database_name="async", databases=databases)
[13449]99    base_storage = FileStorage.FileStorage(db_path, read_only=read_only)
[13190]100    blob_storage = BlobStorage(blob_dir, base_storage)
101    db2 = DB(blob_storage, databases=databases)
102    db2.open()
103    grok_bases()
104    return (db2, db1, base_storage, blob_storage, async_storage)
105
106
107def close_dbs(closables):
108    """Close all elements in `closables`.
109    """
110    for elem in closables:
111        print("Closing %s..." % elem)
112        elem.close()
113        print("Done.")
114
115
116def get_university(conn):
[13203]117    return conn.root()["Application"][APP_NAME]
[13190]118
119
120def get_all_students(container, cnt_from=0, cnt_to=0):
121    cnt = 0
122    for key in container:
123        if cnt < cnt_from:
124            cnt += 1
125            continue
126        if cnt_to and (cnt > cnt_to):
127            break
128        cnt += 1
129        elem = container.get(key)
130        yield elem
131        del elem
132
133
134def partition(container, part_size=10000):
[13193]135    """Partition `container` into chunks.
[13190]136
[13193]137    Get a list of triples (<num>, <index_start>, <index_end>) which
138    represent chunks of elements from `container`.
139
140    The `container` object must support `len()`.
141
142    Split length of `container` and tell what partitions we get, if each
143    partition is size `part_size` or less.
144
145    For instance a container of size 250 and `part_size` 100 would give:
146
147      [(0,   0,  99),
148       (1, 100, 199),
149       (2, 200, 249),
150       ]
151
[13190]152    """
153    num = len(container)
[13193]154    print("Container elements: %s" % num)
[13194]155    return [
156        (idx, start, min(start + part_size - 1, num - 1))
157        for idx, start in enumerate(range(0, num, part_size))]
[13202]158
159
[13205]160def get_mem_info():
161    """Get current memory info.
162
163    This works only, if `psutil` is installed locally (in virtualenv).
164    Otherwise we return `None`.
165    """
166    try:
167        # late import. We do not want to make it a waeup.uniben dependency.
168        import psutil
169    except ImportError:
170        return
171    proc = psutil.Process(os.getpid())
172    return proc.get_memory_info().rss
173
174
[13202]175def export_part(container, part_num, start, end, path):
[13207]176    """Export part number `part_num` from `container` to `path`.
177
178    `path` is the filesystem path we want to export to. `start` and
179    `end` are the index numbers of the elements we want to
180    export. Indexes are zero-based (starting with zero, not one).
181    """
[13202]182    gc.collect()
[13205]183    mem1 = get_mem_info()
[13202]184    print("  Export %s-%s to %s (mem: %s)" % (start, end, path, mem1))
185    tp1 = time.time()
[13204]186    exporter = getUtility(ICSVExporter, name=EXPORTER_NAME)
[13202]187    exporter.export(get_all_students(container, start, end), path)
188    tp2 = time.time()
[13205]189    mem2 = get_mem_info()
[13202]190    print("  Done (%s secs, mem: %s)" % (tp2 - tp1, mem2))
191
192
193def export():
194    """Main function.
195    """
[13210]196    options = handle_options()
[13202]197    closables = init_dbs()
198    conn = closables[0].open()
199    uni = get_university(conn)
200    studs = uni['students']
201
202    parts = partition(studs)
203
[13207]204    parts = [(0, 0, 100), ]
[13202]205    #parts = [(0, 0, 4999), (1, 5000, 9999), (2, 10000, 14999)]
206    #parts = [(0, 0, 14999), ]
207    #parts = [(0, 0, 160000), ]
208
209    workdir = tempfile.mkdtemp()
210
211    t1 = time.time()
212    conn.close()  # every loop will reopen the connection
213    for p_num, start, end in parts:
214        conn = closables[0].open()
215        uni = get_university(conn)
216        studs = uni['students']
217        curr_path = os.path.join(workdir, "myexport%s.csv" % p_num)
218        export_part(studs, p_num, start, end, curr_path)
219        conn.close()
220    print("Result in %s" % workdir)
221    t2 = time.time()
222    print("Elapsed: %s secs" % (t2 - t1))
[13449]223
224
225def bedless_students(university_inst=None):
226    """Get students with beds gone.
227    """
228    conn, closeables = None, None
229    if university_inst is None:
230        closeables = init_dbs(read_only=False)
231        conn = closeables[0].open()
232        university_inst = get_university(conn)
233    for stud_id in university_inst['students']:
234        stud = university_inst['students'][stud_id]
235        if not 'accommodation' in stud.keys():
236            continue
237        accomm = stud['accommodation']
238        for bed_ticket_id in accomm.keys():
239            ticket = accomm[bed_ticket_id]
240            bed = ticket.bed
241            if bed is None:
242                continue
243            if getattr(
244                getattr(bed, '__parent__', None),
245                    '__parent__', None) is None:
246                yield stud_id, ticket, bed
247    if conn is not None:
248        conn.close()
249        close_dbs(closeables)
250
251
252def remove_orphan_beds():
253    """Delete orphaned beds.
254    """
255    closeables = init_dbs(read_only=False)
256    conn = closeables[0].open()
257    uni = get_university(conn)
258    for stud_id, bed_ticket, bed in bedless_students(uni):
259        print("DELETE bed of student %s" % stud_id)
260        bed_ticket.bed = None
261        del bed
262    transaction.commit()
263    conn.close()
264    close_dbs(closeables)
Note: See TracBrowser for help on using the repository browser.