source: main/waeup.kofa/trunk/tools/prepare_jamb_file.py @ 8542

Last change on this file since 8542 was 8541, checked in by Henrik Bettermann, 13 years ago

Converter script for JAMB import files

  • Property svn:keywords set to Id
File size: 5.1 KB
Line 
1## $Id: prepare_jamb_file.py 8541 2012-05-29 07:10:22Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19"""
20import csv
21import datetime
22import re
23import sys
24
25##
26## CONFIGURATION SECTION
27##
28# keys are fieldnames in input file, values are methods of class
29# Converter (see below)
30OPTIONS = {
31    'sex': 'gender',
32    'session': 'session',
33    'lga': 'lga',
34    'firstname': 'name',
35    'middlename': 'name',
36    'lastname': 'uppername',
37    'container_code': 'container_code',
38    }
39
40# Mapping input file colnames --> output file colnames
41COLNAME_MAPPING = {
42    'jambscore': 'jamb_score',
43    'eng_score': 'jamb_subjects',
44    }
45
46##
47## END OF CONFIG
48##
49
50# Look for the first sequence of numbers
51RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
52
53def convert_fieldnames(fieldnames):
54    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
55    """
56    header = dict([(name, name) for name in fieldnames])
57    for in_name, out_name in COLNAME_MAPPING.items():
58        if in_name not in header:
59            continue
60        header[in_name] = out_name
61    return header
62
63def merge_subjects(row):
64    """Merge jamb subjects and scores into one field.
65    """
66    if 'eng_score' in row and 'Subj2'   in row and 'Subj2Score' in row \
67        and 'Subj3' in row and 'Subj3Score' in row and 'Subj4' in row  \
68        and 'Subj4Score' in row:
69        subjectstring = "English: %s, %s: %s, %s: %s, %s: %s"
70        row['eng_score'] =  subjectstring % (
71            row['eng_score'],
72            row['Subj2'], row['Subj2Score'],
73            row['Subj3'], row['Subj3Score'],
74            row['Subj4'], row['Subj4Score'],
75            )
76    else:
77        pass
78
79class Converters():
80    """Converters to turn old-style values into new ones.
81    """
82
83    @classmethod
84    def name(self, value):
85        """ 'JOHN -> John'
86        """
87        return value.capitalize()
88
89    @classmethod
90    def uppername(self, value):
91        """ 'John -> JOHN'
92        """
93        return value.upper()
94
95    @classmethod
96    def container_code(self, value):
97        """ Return constant string.
98        """
99        return 'putme2012'
100
101    @classmethod
102    def lga(self, value):
103        """
104        """
105        if value == 'akwa_ibom_uru_offong_oruko':
106            return 'akwa_ibom_urue-offong-oruko'
107        try:
108            value = value.replace("'","")
109            value = value.lower()
110        except:
111            return ''
112        return value
113
114
115    @classmethod
116    def session(self, value):
117        """ '08' --> '2008'
118        '2008/2009' --> '2008'
119        """
120        if '/' in value:
121            numbers = value.split('/')
122            number = int(numbers[0])
123            if number in range(2000,2015):
124                return number
125            else:
126                return 9999
127        try:
128            number = int(value)
129        except ValueError:
130            #import pdb; pdb.set_trace()
131            return 9999
132        if number < 14:
133            return number + 2000
134        elif number in range(2000,2015):
135            return number
136        else:
137            return 9999
138
139    @classmethod
140    def gender(self, value):
141        """ 'True'/'False' --> 'f'/'m'
142            'F'/'M' --> 'f'/'m'
143        """
144        if value in ('True','f','F'):
145            value = 'f'
146        elif value in ('False','m','M'):
147            value = 'm'
148        else:
149            value = ''
150        return value
151
152
153def main():
154    input_file = '%s' % sys.argv[1]
155    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
156    reader = csv.DictReader(open(input_file, 'rb'))
157    writer = None
158
159    for num, row in enumerate(reader):
160        if num == 0:
161            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
162            print "FIELDS: "
163            for x, y in enumerate(reader.fieldnames):
164                print x, y
165            header = convert_fieldnames(reader.fieldnames)
166            writer.writerow(header)
167        merge_subjects(row)
168        for key, value in row.items():
169            if not key in OPTIONS.keys():
170                continue
171            conv_name = OPTIONS[key]
172            converter = getattr(Converters, conv_name, None)
173            if converter is None:
174                print "WARNING: cannot find converter %s" % conv_name
175                continue
176            row[key] = converter(row[key])
177        try:
178            writer.writerow(row)
179        except:
180            print row['reg_number']
181
182    print "Output written to %s" % output_file
183
184
185if __name__ == '__main__':
186    if len(sys.argv) != 2:
187        print 'Usage: %s <filename>' % __file__
188        sys.exit(1)
189    main()
Note: See TracBrowser for help on using the repository browser.