source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7532

Last change on this file since 7532 was 7530, checked in by Henrik Bettermann, 13 years ago

Rename study_course column.

  • Property svn:keywords set to Id
File size: 5.6 KB
RevLine 
[7512]1## $Id: fix_import_file.py 7530 2012-01-28 08:38:19Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[7518]19Fix exports from old SRP portal to make them importable by current portal.
[7512]20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
[7518]26  python fix_import_file.py <filename>
[7512]27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
31
[7518]32import sys
33
34
35if len(sys.argv) != 2:
36    print 'Usage: python fix_import_file.py <filename>'
37    sys.exit(1)
38
[7512]39##
40## CONFIGURATION SECTION
41##
42# file with input data
[7518]43INPUT_FILE = '%s' % sys.argv[1]
[7512]44
45# file written with modified output
[7518]46OUTPUT_FILE = '%s_edited.csv' % sys.argv[1].split('.')[0]
[7512]47
48# keys are fieldnames in input file, values are methods of class
49# Converter (see below)
50OPTIONS = {
51    'sex': 'gender',
52    'birthday': 'date',
53    'request_date': 'datetime',
[7514]54    'marit_stat': 'marit_stat',
55    'entry_session': 'session',
56    'current_session': 'session',
[7526]57    'reg_state': 'reg_state',
58    'password': 'password',
[7512]59    }
[7516]60
61# Mapping input file colnames --> output file colnames
62COLNAME_MAPPING = {
[7518]63    'jamb_reg_no': 'reg_number',
[7516]64    'birthday': 'date_of_birth',
[7526]65    'clr_ac_pin': 'clr_code',
[7530]66    'study_course': 'certificate',
[7516]67    }
[7526]68
69# Mapping input regh_state --> output reg_state
70REGSTATE_MAPPING = {
71    'student_created': 'created',
72    'admitted': 'admitted',
73    'clearance_pin_entered': 'clearance started',
74    'clearance_requested': 'clearance requested',
75    'cleared_and_validated': 'cleared',
76    'school_fee_paid': 'school fee paid',
77    'returning': 'returning',
78    'courses_registered': 'courses registered',
79    'courses_validated': 'courses validated',
80    }
81
[7512]82##
83## END OF CONFIG
84##
85
86import csv
87import datetime
88import sys
89
[7516]90def convert_fieldnames(fieldnames):
91    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
92    """
93    header = dict([(name, name) for name in fieldnames])
94    for in_name, out_name in COLNAME_MAPPING.items():
95        if in_name not in header:
96            continue
97        header[in_name] = out_name
[7514]98    return header
99
[7512]100class Converters():
101    """Converters to turn old-style values into new ones.
102    """
103    @classmethod
[7526]104    def reg_state(self, value):
105        """ 'courses_validated' --> 'courses validated'
106        """
107        return REGSTATE_MAPPING.get(value,value)
108
109    @classmethod
[7514]110    def session(self, value):
111        """ '08' --> '2008'
112        """
113        try:
114            number = int(value)
115        except ValueError:
116            return 9999
117        if number < 14:
118            return number + 2000
119        elif number in range(2000,2015):
120            return number
121        else:
122            return 9999
123
124    @classmethod
125    def marit_stat(self, value):
126        """ 'True'/'False' --> 'married'/'unmarried'
127        """
128        if value == 'True':
129            value = 'married'
130        elif value == 'False':
131            value = 'unmarried'
132        else:
133            value = ''
134        return value
135
136    @classmethod
137    def gender(self, value):
[7526]138        """ 'True'/'False' --> 'f'/'m'
[7514]139        """
140        if value == 'True':
[7526]141            value = 'f'
[7514]142        elif value == 'False':
[7526]143            value = 'm'
[7514]144        else:
145            value = ''
146        return value
147
148    @classmethod
[7512]149    def date(self, value):
150        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
151        """
152        if value == "None":
153            value = ""
154        elif value == "":
155            value = ""
156        else:
157            value = value.replace('/', '-')
[7514]158            # We add the hash symbol to avoid automatic date transformation
159            # in Excel and Calc for further processing
160            value += '#'
[7512]161        return value
162
163    @classmethod
164    def datetime(self, value):
165        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
166        """
167        #print  "IN: ", value
168        if value == "None":
169            value = ""
170        elif value == "":
171            value = ""
172        else:
173            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
174            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
175            pass
176        #print "OUT: ", value
177        return value
178
[7526]179    @classmethod
180    def password(self, value):
181        if value == "not set":
182            return ""
183        return value
184
185
[7512]186reader = csv.DictReader(open(INPUT_FILE, 'rb'))
187writer = None
188
189for num, row in enumerate(reader):
190    if num == 0:
191        writer = csv.DictWriter(open(OUTPUT_FILE, 'wb'), reader.fieldnames)
192        print "FIELDS: "
193        for x, y in enumerate(reader.fieldnames):
194            print x, y
[7516]195        header = convert_fieldnames(reader.fieldnames)
[7512]196        writer.writerow(header)
197    for key, value in row.items():
198        if not key in OPTIONS.keys():
199            continue
200        conv_name = OPTIONS[key]
201        converter = getattr(Converters, conv_name, None)
202        if converter is None:
203            print "WARNING: cannot find converter %s" % conv_name
204            continue
205        row[key] = converter(row[key])
206    writer.writerow(row)
207
[7518]208print "Output written to %s" % OUTPUT_FILE
Note: See TracBrowser for help on using the repository browser.