source: main/waeup.uniben/trunk/src/waeup/uniben/scripts.py @ 13329

Last change on this file since 13329 was 13210, checked in by uli, 9 years ago

Add a rudimentary argument parser.

At least we can call --help now.

File size: 5.7 KB
Line 
1"""The export() function herein is called by the kofaexport script.
2
3To make this work, you have to pip install psutil in your local virtualenv.
4"""
5import argparse
6import gc
7import grok
8import os
9import tempfile
10import time
11from ZODB import DB, DemoStorage, FileStorage
12from ZODB.blob import BlobStorage
13from zope.component import getUtility
14from waeup.kofa.interfaces import ICSVExporter
15
16
17#: Remember what packages were grokked already.
18grokked = dict()
19
20
21#: Packages that have to be 'grokked' in order to register exporters
22#: and everything else.
23#: The order of these packages is usually relevant.
24TO_BE_GROKKED = ("waeup.kofa", "kofacustom.nigeria", "waeup.uniben")
25
26#: The data root from where we can look out for Data.fs, etc. This is
27#: normally the `var/` dir of an instance.
28VAR_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(
29    os.path.dirname(__file__)))), 'var')
30
31#: The name of the `University` instance in which we look for export
32#: items.
33APP_NAME = "uniben"
34
35#: The name under which the exporter to use is registered.
36EXPORTER_NAME = "students"
37
38
39def handle_options():
40    """Handle commandline options.
41    """
42    parser = argparse.ArgumentParser(
43        description="Export WAeUP kofa data")
44    args = parser.parse_args()
45    return args
46
47
48def grok_bases():
49    """Grok packages not already grokked.
50    """
51    for pkg_name in TO_BE_GROKKED:
52        if grokked.get(pkg_name, False):
53            continue
54        print("Grokking %s..." % pkg_name)
55        grokked[pkg_name] = True
56        grok.testing.grok(pkg_name)
57        print("Done.")
58
59
60def init_dbs():
61    """Setup databases.
62
63    We return a sequence of `closables`. The closables can be passed
64    to close_dbs() if you're done.
65
66    The first of the elements returned is the main database. open()
67    it for a new connection to the ZODB.
68    """
69    # https://github.com/zopefoundation/ZODB/\
70    #         blob/master/src/ZODB/cross-database-references.txt
71    databases = {}
72    db_dir = os.path.join(VAR_DIR, 'filestorage')
73    blob_dir = os.path.join(VAR_DIR, 'blobstorage')
74    db_path = os.path.join(db_dir, 'Data.fs')
75    async_db_path = os.path.join(db_dir, 'Data.async.fs')
76    async_storage = FileStorage.FileStorage(async_db_path, read_only=True)
77    db1 = DB(async_storage, database_name="async", databases=databases)
78    base_storage = FileStorage.FileStorage(db_path, read_only=True)
79    blob_storage = BlobStorage(blob_dir, base_storage)
80    db2 = DB(blob_storage, databases=databases)
81    db2.open()
82    grok_bases()
83    return (db2, db1, base_storage, blob_storage, async_storage)
84
85
86def close_dbs(closables):
87    """Close all elements in `closables`.
88    """
89    for elem in closables:
90        print("Closing %s..." % elem)
91        elem.close()
92        print("Done.")
93
94
95def get_university(conn):
96    return conn.root()["Application"][APP_NAME]
97
98
99def get_all_students(container, cnt_from=0, cnt_to=0):
100    cnt = 0
101    for key in container:
102        if cnt < cnt_from:
103            cnt += 1
104            continue
105        if cnt_to and (cnt > cnt_to):
106            break
107        cnt += 1
108        elem = container.get(key)
109        yield elem
110        del elem
111
112
113def partition(container, part_size=10000):
114    """Partition `container` into chunks.
115
116    Get a list of triples (<num>, <index_start>, <index_end>) which
117    represent chunks of elements from `container`.
118
119    The `container` object must support `len()`.
120
121    Split length of `container` and tell what partitions we get, if each
122    partition is size `part_size` or less.
123
124    For instance a container of size 250 and `part_size` 100 would give:
125
126      [(0,   0,  99),
127       (1, 100, 199),
128       (2, 200, 249),
129       ]
130
131    """
132    num = len(container)
133    print("Container elements: %s" % num)
134    return [
135        (idx, start, min(start + part_size - 1, num - 1))
136        for idx, start in enumerate(range(0, num, part_size))]
137
138
139def get_mem_info():
140    """Get current memory info.
141
142    This works only, if `psutil` is installed locally (in virtualenv).
143    Otherwise we return `None`.
144    """
145    try:
146        # late import. We do not want to make it a waeup.uniben dependency.
147        import psutil
148    except ImportError:
149        return
150    proc = psutil.Process(os.getpid())
151    return proc.get_memory_info().rss
152
153
154def export_part(container, part_num, start, end, path):
155    """Export part number `part_num` from `container` to `path`.
156
157    `path` is the filesystem path we want to export to. `start` and
158    `end` are the index numbers of the elements we want to
159    export. Indexes are zero-based (starting with zero, not one).
160    """
161    gc.collect()
162    mem1 = get_mem_info()
163    print("  Export %s-%s to %s (mem: %s)" % (start, end, path, mem1))
164    tp1 = time.time()
165    exporter = getUtility(ICSVExporter, name=EXPORTER_NAME)
166    exporter.export(get_all_students(container, start, end), path)
167    tp2 = time.time()
168    mem2 = get_mem_info()
169    print("  Done (%s secs, mem: %s)" % (tp2 - tp1, mem2))
170
171
172def export():
173    """Main function.
174    """
175    options = handle_options()
176    closables = init_dbs()
177    conn = closables[0].open()
178    uni = get_university(conn)
179    studs = uni['students']
180
181    parts = partition(studs)
182
183    parts = [(0, 0, 100), ]
184    #parts = [(0, 0, 4999), (1, 5000, 9999), (2, 10000, 14999)]
185    #parts = [(0, 0, 14999), ]
186    #parts = [(0, 0, 160000), ]
187
188    workdir = tempfile.mkdtemp()
189
190    t1 = time.time()
191    conn.close()  # every loop will reopen the connection
192    for p_num, start, end in parts:
193        conn = closables[0].open()
194        uni = get_university(conn)
195        studs = uni['students']
196        curr_path = os.path.join(workdir, "myexport%s.csv" % p_num)
197        export_part(studs, p_num, start, end, curr_path)
198        conn.close()
199    print("Result in %s" % workdir)
200    t2 = time.time()
201    print("Elapsed: %s secs" % (t2 - t1))
Note: See TracBrowser for help on using the repository browser.