source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7517

Last change on this file since 7517 was 7516, checked in by uli, 13 years ago
  • Make col names a configuration setting.
  • Remove doubled gender converter.
File size: 4.7 KB
Line 
1##
2## fix_import_file.py
3## Login : <uli@pu.smp.net>
4## Started on  Wed Jan 25 17:08:30 2012 Uli Fouquet
5## $Id$
6##
7## Copyright (C) 2012 Uli Fouquet
8## This program is free software; you can redistribute it and/or modify
9## it under the terms of the GNU General Public License as published by
10## the Free Software Foundation; either version 2 of the License, or
11## (at your option) any later version.
12##
13## This program is distributed in the hope that it will be useful,
14## but WITHOUT ANY WARRANTY; without even the implied warranty of
15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16## GNU General Public License for more details.
17##
18## You should have received a copy of the GNU General Public License
19## along with this program; if not, write to the Free Software
20## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21##
22"""
23Fix exports from old SIRP portal to make them importable by current portal.
24
25Usage:
26
27Change into this directory, set the options below (files are assumed
28to be in the same directory) and then run
29
30  python fix_import_file.py
31
32Errors/warnings will be displayed on the shell, the output will be put
33into the specified output file.
34"""
35
36##
37## CONFIGURATION SECTION
38##
39# file with input data
40INPUT_FILE = 'students_for_reimport.csv'
41
42# file written with modified output
43OUTPUT_FILE = 'out.csv'
44
45# keys are fieldnames in input file, values are methods of class
46# Converter (see below)
47OPTIONS = {
48    'sex': 'gender',
49    'birthday': 'date',
50    'request_date': 'datetime',
51    'marit_stat': 'marit_stat',
52    'entry_session': 'session',
53    'current_session': 'session',
54    }
55
56# Mapping input file colnames --> output file colnames
57COLNAME_MAPPING = {
58    'jamb_reg_no': 'reg_no',
59    'birthday': 'date_of_birth',
60    }
61##
62## END OF CONFIG
63##
64
65import csv
66import datetime
67import sys
68
69def convert_fieldnames(fieldnames):
70    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
71    """
72    header = dict([(name, name) for name in fieldnames])
73    for in_name, out_name in COLNAME_MAPPING.items():
74        if in_name not in header:
75            continue
76        header[in_name] = out_name
77    return header
78
79class Converters():
80    """Converters to turn old-style values into new ones.
81    """
82    @classmethod
83    def session(self, value):
84        """ '08' --> '2008'
85        """
86        try:
87            number = int(value)
88        except ValueError:
89            return 9999
90        if number < 14:
91            return number + 2000
92        elif number in range(2000,2015):
93            return number
94        else:
95            return 9999
96
97    @classmethod
98    def marit_stat(self, value):
99        """ 'True'/'False' --> 'married'/'unmarried'
100        """
101        if value == 'True':
102            value = 'married'
103        elif value == 'False':
104            value = 'unmarried'
105        else:
106            value = ''
107        return value
108
109    @classmethod
110    def gender(self, value):
111        """ 'True'/'False' --> 'female'/'male'
112        """
113        if value == 'True':
114            value = 'female'
115        elif value == 'False':
116            value = 'male'
117        else:
118            value = ''
119        return value
120
121    @classmethod
122    def date(self, value):
123        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
124        """
125        if value == "None":
126            value = ""
127        elif value == "":
128            value = ""
129        else:
130            value = value.replace('/', '-')
131            # We add the hash symbol to avoid automatic date transformation
132            # in Excel and Calc for further processing
133            value += '#'
134        return value
135
136    @classmethod
137    def datetime(self, value):
138        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
139        """
140        #print  "IN: ", value
141        if value == "None":
142            value = ""
143        elif value == "":
144            value = ""
145        else:
146            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
147            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
148            pass
149        #print "OUT: ", value
150        return value
151
152reader = csv.DictReader(open(INPUT_FILE, 'rb'))
153writer = None
154
155for num, row in enumerate(reader):
156    if num == 0:
157        writer = csv.DictWriter(open(OUTPUT_FILE, 'wb'), reader.fieldnames)
158        print "FIELDS: "
159        for x, y in enumerate(reader.fieldnames):
160            print x, y
161        header = convert_fieldnames(reader.fieldnames)
162        writer.writerow(header)
163    for key, value in row.items():
164        if not key in OPTIONS.keys():
165            continue
166        conv_name = OPTIONS[key]
167        converter = getattr(Converters, conv_name, None)
168        if converter is None:
169            print "WARNING: cannot find converter %s" % conv_name
170            continue
171        row[key] = converter(row[key])
172    writer.writerow(row)
173
174print "Ouput written to %s" % OUTPUT_FILE
Note: See TracBrowser for help on using the repository browser.