source: main/waeup.kofa/branches/0.2/tools/prepare_jamb_file.py @ 12701

Last change on this file since 12701 was 8546, checked in by Henrik Bettermann, 12 years ago

Replace some odd lga keys.

  • Property svn:keywords set to Id
File size: 5.4 KB
Line 
1## $Id: prepare_jamb_file.py 8546 2012-05-29 12:24:32Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19"""
20import csv
21import datetime
22import re
23import sys
24
25##
26## CONFIGURATION SECTION
27##
28# keys are fieldnames in input file, values are methods of class
29# Converter (see below)
30OPTIONS = {
31    'sex': 'gender',
32    'session': 'session',
33    'lga': 'lga',
34    'firstname': 'name',
35    'middlename': 'name',
36    'lastname': 'uppername',
37    'container_code': 'container_code',
38    }
39
40# Mapping input file colnames --> output file colnames
41COLNAME_MAPPING = {
42    'jambscore': 'jamb_score',
43    'eng_score': 'jamb_subjects',
44    }
45
46##
47## END OF CONFIG
48##
49
50# Look for the first sequence of numbers
51RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
52
53def convert_fieldnames(fieldnames):
54    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
55    """
56    header = dict([(name, name) for name in fieldnames])
57    for in_name, out_name in COLNAME_MAPPING.items():
58        if in_name not in header:
59            continue
60        header[in_name] = out_name
61    return header
62
63def merge_subjects(row):
64    """Merge jamb subjects and scores into one field.
65    """
66    if 'eng_score' in row and 'Subj2'   in row and 'Subj2Score' in row \
67        and 'Subj3' in row and 'Subj3Score' in row and 'Subj4' in row  \
68        and 'Subj4Score' in row:
69        subjectstring = "English: %s, %s: %s, %s: %s, %s: %s"
70        row['eng_score'] =  subjectstring % (
71            row['eng_score'],
72            row['Subj2'], row['Subj2Score'],
73            row['Subj3'], row['Subj3Score'],
74            row['Subj4'], row['Subj4Score'],
75            )
76    else:
77        pass
78
79class Converters():
80    """Converters to turn old-style values into new ones.
81    """
82
83    @classmethod
84    def name(self, value):
85        """ 'JOHN -> John'
86        """
87        return value.capitalize()
88
89    @classmethod
90    def uppername(self, value):
91        """ 'John -> JOHN'
92        """
93        return value.upper()
94
95    @classmethod
96    def container_code(self, value):
97        """ Return constant string.
98        """
99        return 'putme2012'
100
101    @classmethod
102    def lga(self, value):
103        """
104        """
105        if value == 'akwa_ibom_uru_offong_oruko':
106            return 'akwa_ibom_urue-offong-oruko'
107        if value == 'edo_akoko_edo':
108            return 'edo_akoko-edo'
109        if value == 'edo_owan_east':
110            return 'edo_owan-east'
111        if value == 'kogi_mopa-muro-mopi':
112            return 'kogi_mopa-muro'
113        if value == 'foreign':
114            return 'foreigner'
115        try:
116            value = value.replace("'","")
117            value = value.lower()
118        except:
119            return ''
120        return value
121
122
123    @classmethod
124    def session(self, value):
125        """ '08' --> '2008'
126        '2008/2009' --> '2008'
127        """
128        if '/' in value:
129            numbers = value.split('/')
130            number = int(numbers[0])
131            if number in range(2000,2015):
132                return number
133            else:
134                return 9999
135        try:
136            number = int(value)
137        except ValueError:
138            #import pdb; pdb.set_trace()
139            return 9999
140        if number < 14:
141            return number + 2000
142        elif number in range(2000,2015):
143            return number
144        else:
145            return 9999
146
147    @classmethod
148    def gender(self, value):
149        """ 'True'/'False' --> 'f'/'m'
150            'F'/'M' --> 'f'/'m'
151        """
152        if value in ('True','f','F'):
153            value = 'f'
154        elif value in ('False','m','M'):
155            value = 'm'
156        else:
157            value = ''
158        return value
159
160
161def main():
162    input_file = '%s' % sys.argv[1]
163    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
164    reader = csv.DictReader(open(input_file, 'rb'))
165    writer = None
166
167    for num, row in enumerate(reader):
168        if num == 0:
169            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
170            print "FIELDS: "
171            for x, y in enumerate(reader.fieldnames):
172                print x, y
173            header = convert_fieldnames(reader.fieldnames)
174            writer.writerow(header)
175        merge_subjects(row)
176        for key, value in row.items():
177            if not key in OPTIONS.keys():
178                continue
179            conv_name = OPTIONS[key]
180            converter = getattr(Converters, conv_name, None)
181            if converter is None:
182                print "WARNING: cannot find converter %s" % conv_name
183                continue
184            row[key] = converter(row[key])
185        try:
186            writer.writerow(row)
187        except:
188            print row['reg_number']
189
190    print "Output written to %s" % output_file
191
192
193if __name__ == '__main__':
194    if len(sys.argv) != 2:
195        print 'Usage: %s <filename>' % __file__
196        sys.exit(1)
197    main()
Note: See TracBrowser for help on using the repository browser.