source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9400

Last change on this file since 9400 was 9362, checked in by uli, 12 years ago

Update tools to allow new ids in imports. Support for new import col 'old_id'.

  • Property svn:keywords set to Id
File size: 15.1 KB
Line 
1## $Id: fix_import_file.py 9362 2012-10-19 22:59:41Z uli $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import os
39import re
40import sys
41
42try:
43    from lgas import LGAS
44except:
45    print 'ERROR: lgas.py is missing.'
46    sys.exit(1)
47
48def strip(string):
49    string = string.replace('_', '')
50    string = string.replace('/', '')
51    string = string.replace('-', '')
52    string = string.replace(' ', '')
53    string = string.lower()
54    return string
55
56LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
57LGAS_dict = dict(LGAS)
58
59##
60## CONFIGURATION SECTION
61##
62# keys are fieldnames in input file, values are methods of class
63# Converter (see below)
64OPTIONS = {
65    'student_id': 'student_id',
66    'sex': 'gender',
67    'birthday': 'date',
68    'marit_stat': 'marit_stat',
69    'session': 'session',
70    'entry_session': 'session',
71    'current_session': 'session',
72    'session_id': 'session',
73    'entry_mode': 'mode',
74    'reg_state': 'reg_state',
75    'password': 'password',
76    'phone': 'phone',
77    'nationality': 'nationality',
78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
85    'lga': 'lga',
86    'order_id': 'no_int',
87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
99    'email': 'email',
100    'fst_sit_type': 'sittype',
101    'scd_sit_type': 'sittype',
102    'resp_pay_reference': 'no_int',
103    'type': 'company',
104    'date': 'date',
105    'core_or_elective': 'bool',
106    'category': 'p_category',
107    'reg_transition': 'reg_state',  # we completely change this column,
108                                    # since reg_state import is usually intended
109    'transition': 'reg_transition',
110    'payment_date': 'date',
111    'validation_date': 'date',
112    }
113
114# Mapping input file colnames --> output file colnames
115COLNAME_MAPPING = {
116    # base data
117    'id': 'student_id',
118    'reg_state': 'state',
119    'reg_transition': 'state',
120    'jamb_reg_no': 'reg_number',
121    'matric_no': 'matric_number',
122    'birthday': 'date_of_birth',
123    'clr_ac_pin': 'clr_code',
124    # clearance
125    'hq_grade': 'hq_degree',
126    'uniben': 'former_matric',
127    'hq_type2': 'hq2_type',
128    'hq_grade2': 'hq2_degree',
129    'hq_school2': 'hq2_school',
130    'hq_matric_no2': 'hq2_matric_no',
131    'hq_session2': 'hq2_session',
132    'hq_disc2': 'hq2_disc',
133    'emp': 'employer',
134    'emp2': 'employer2',
135    'emp_position2': 'emp2_position',
136    'emp_start2': 'emp2_start',
137    'emp_end2': 'emp2_end',
138    'emp_reason2': 'emp2_reason',
139    # study course
140    'study_course': 'certificate',
141    # study level
142    'session': 'level_session',
143    'verdict': 'level_verdict',
144    # course ticket
145    'level_id': 'level',
146    'core_or_elective': 'mandatory',
147    # payment ticket
148    'order_id': 'p_id',
149    'status': 'p_state',
150    'category': 'p_category',
151    'resp_pay_reference': 'r_pay_reference',
152    'resp_desc': 'r_desc',
153    'resp_approved_amount': 'r_amount_approved',
154    'item': 'p_item',
155    'amount': 'amount_auth',
156    'resp_card_num': 'r_card_num',
157    'resp_code': 'r_code',
158    'date': 'creation_date',
159    'surcharge': 'surcharge_1',
160    'session_id': 'p_session',
161    'type': 'r_company',
162    'old_id': 'old_id',
163    }
164
165# Mapping input state --> output state
166REGSTATE_MAPPING = {
167    'student_created': 'created',
168    'admitted': 'admitted',
169    'objection_raised': 'clearance started',
170    'clearance_pin_entered': 'clearance started',
171    'clearance_requested': 'clearance requested',
172    'cleared_and_validated': 'cleared',
173    'school_fee_paid': 'school fee paid',
174    'returning': 'returning',
175    'courses_registered': 'courses registered',
176    'courses_validated': 'courses validated',
177    'admit': 'admitted',
178    'return': 'returning'
179    }
180
181# Mapping of special cases, where new id is not deductible from old id
182# Set to `None`, if no such special cases should be considered.
183ID_MAP_CSV = None
184ID_MAP_CSV = "id_mapping.csv"
185
186##
187## END OF CONFIG
188##
189
190# Look for the first sequence of numbers
191RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
192
193def get_id_mapping():
194    """Returns a dict mapping from old (SRP) ids to new ids.
195
196    The dict is read from ID_MAP_CSV file. If this var is set to
197    ``None`` an empty dict is returned. The ID_MAP_CSV contains only
198    the student ids of those students, for which the standard method
199    (new_id=CHAR+old_id) does not work.
200    """
201    if ID_MAP_CSV is None:
202        return {}
203    if not os.path.isfile(ID_MAP_CSV):
204        raise IOError(
205            "No such file for mapping old to new ids: %s" % ID_MAP_CSV)
206    result = dict()
207    reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
208    for row in reader:
209        result[row['student_id']] = row['new_id']
210    return result
211
212
213def convert_fieldnames(fieldnames):
214    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
215    """
216    # Remove whitespaces
217    header = dict([(name, name.strip()) for name in fieldnames])
218    for in_name, out_name in COLNAME_MAPPING.items():
219        if in_name not in header.values():
220            continue
221        # Inverse dictionary lookup
222        key = [key for key,value in header.items() if value==in_name][0]
223        header[key] = out_name
224    return header
225
226class Converters():
227    """Converters to turn old-style values into new ones.
228    """
229
230    old_new_id_map = get_id_mapping()
231
232    @classmethod
233    def student_id(cls, value, row):
234        """ 'A123456' --> 'EA123456'
235        """
236        value = cls.old_new_id_map.get(value, value)
237        if len(value) == 7:
238            return 'M' + value
239        return value
240
241    @classmethod
242    def reg_state(self, value, row):
243        """ 'courses_validated' --> 'courses validated'
244        """
245        return REGSTATE_MAPPING.get(value,value)
246
247    @classmethod
248    def reg_transition(self, value, row):
249        if value == "admitted":
250            return "admit"
251        if value == "returning":
252            return "return"
253        return value
254
255    @classmethod
256    def level(self, value, row):
257        """ '000' --> '10'
258        '800' --> '999' if pg student
259        """
260        try:
261            number = int(value)
262        except ValueError:
263            return 9999
264        if number == 0:
265            return 10
266        if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
267            return 999
268        return number
269
270    @classmethod
271    def semester(self, value, row):
272        """ '0' --> '9'
273        """
274        try:
275            number = int(value)
276        except ValueError:
277            return 9999
278        if number == 0:
279            return 9
280        return number
281
282    @classmethod
283    def application_category(self, value, row):
284        """ '' --> 'no'
285        """
286        if value == '':
287            return 'no'
288        return value
289
290    @classmethod
291    def lga(self, value, row):
292        """ Remove apostrophe
293        """
294        if value == 'akwa_ibom_uru_offong_oruko':
295            return 'akwa_ibom_urue-offong-oruko'
296        if value == 'edo_ohionmwon':
297            return 'edo_orhionmwon'
298
299        if value == 'nassarawa_nassarawa':
300            return 'nassarawa_nassawara'
301
302        if value == 'kogi_mopa-muro-mopi':
303            return 'kogi_mopa-muro'
304
305        if value == 'delta_osimili-north':
306            return 'delta_oshielli-north'
307
308        if value == 'delta_osimili':
309            return 'delta_oshimili'
310
311        if value == 'delta_osimili-south':
312            return 'delta_oshimili-south'
313        try:
314            value = value.replace("'","")
315        except:
316            return ''
317        lower = value.lower()
318        if lower in LGAS_dict.keys():
319            return lower
320        # If real names are given, let's see if a similar value
321        # in LGAS exist.
322        value = LGAS_inverted_stripped.get(strip(lower), value)
323        return value
324
325
326    @classmethod
327    def session(self, value, row):
328        """ '08' --> '2008'
329        '2008/2009' --> '2008'
330        """
331        if '/' in value:
332            numbers = value.split('/')
333            number = int(numbers[0])
334            if number in range(2000,2015):
335                return number
336            else:
337                return 9999
338        try:
339            number = int(value)
340        except ValueError:
341            return 9999
342        if number < 14:
343            return number + 2000
344        elif number in range(2000,2015):
345            return number
346        else:
347            return 9999
348
349    @classmethod
350    def former(self, value, row):
351        """ True --> yes
352        '2008/2009' --> '2008'
353        """
354        if value == 'True':
355            return 'yes'
356        return
357
358    @classmethod
359    def bool(self, value, row):
360        """ True --> 1
361        """
362        if value in ('TRUE', 'True'):
363            return '1'
364        elif value in ('FALSE', 'False'):
365            return '0'
366        return
367
368    @classmethod
369    def year(self, value, row):
370        """ '0' --> ''
371        """
372        if value == '0':
373            return
374        if value == 'None':
375            return
376        return value
377
378
379    @classmethod
380    def marit_stat(self, value, row):
381        """ 'True'/'False' --> 'married'/'unmarried'
382        """
383        if value in ('True','married'):
384            value = 'married'
385        elif value in ('False','unmarried'):
386            value = 'unmarried'
387        else:
388            value = ''
389        return value
390
391    @classmethod
392    def gender(self, value, row):
393        """ 'True'/'False' --> 'f'/'m'
394        """
395        if value.strip() in ('F', 'True','f'):
396            value = 'f'
397        elif value.strip() in ('M', 'False','m'):
398            value = 'm'
399        else:
400            value = ''
401        return value
402
403    @classmethod
404    def date(self, value, row):
405        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
406        """
407        if value == "None":
408            value = ""
409        elif value == "":
410            value = ""
411        else:
412            value = value.replace('/', '-')
413            # We add the hash symbol to avoid automatic date transformation
414            # in Excel and Calc for further processing
415            value += '#'
416        return value
417
418    @classmethod
419    def no_int(self, value, row):
420        """ Add hash and skip numbers starting with 999999
421        """
422        # We add the hash symbol to avoid automatic number transformation
423        # in Excel and Calc for further processing
424        try:
425            intvalue = int(value)
426            value += '#'
427        except:
428            pass
429        if value.startswith('999999'):
430            return
431        return value
432
433    @classmethod
434    def mode(self, value, row):
435        if value == "transfer_fulltime":
436            return "transfer_ft"
437        if value == "ume_ft":
438            return "utme_ft"
439        return value
440
441    @classmethod
442    def password(self, value, row):
443        if value == "not set":
444            return ""
445        return value
446
447    @classmethod
448    def nationality(self, value, row):
449        if value in ('nigeria', 'Nigeria'):
450            return "NG"
451        if value in ('niger', 'Niger'):
452            return "NE"
453        return value
454
455    @classmethod
456    def sittype(self, value, row):
457        if value == "nabtec":
458            return "nabteb"
459        return value
460
461    @classmethod
462    def company(self, value, row):
463        if value == "online":
464            return "interswitch"
465        return value
466
467    @classmethod
468    def p_category(self, value, row):
469        if value == "acceptance":
470            return "clearance"
471        return value
472
473    @classmethod
474    def email(self, value, row):
475        return value.strip()
476
477    @classmethod
478    def phone(self, value, row):
479        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
480
481        Dashes and slashes are removed before looking for sequences
482        of numbers.
483        """
484        if not value:
485            return
486        value = value.strip('#')
487        value = value.replace('-', '')
488        value = value.replace('/', '')
489        match = RE_PHONE.match(value)
490        phone = match.groups()[0]
491        if value.startswith('234'):
492            value = '+' + value[:3] + '-' + value[3:]
493        else:
494            value = '-%s' % phone
495        return value + '#'
496
497    @classmethod
498    def result(self, value, row):
499        try:
500            liste = eval(value)
501        except:
502            return
503        if isinstance(liste,list):
504            return [(i[0].lower(),i[1]) for i in liste]
505        return
506
507
508def main():
509    input_file = '%s' % sys.argv[1]
510    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
511    reader = csv.DictReader(open(input_file, 'rb'))
512    writer = None
513
514    for num, row in enumerate(reader):
515        if num == 0:
516            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
517            print "FIELDS: "
518            for x, y in enumerate(reader.fieldnames):
519                print x, y
520            header = convert_fieldnames(reader.fieldnames)
521            writer.writerow(header)
522        if row.get('reg_state') == 'student_created':
523            # We do not reimport student records which have never been accessed.
524            continue
525        if row.get('status') == 'started':
526            # We do not reimport started payments.
527            continue
528        for key, value in row.items():
529            # Remove unwanted whitespaces.
530            row[key] = row[key].strip()
531            if not key in OPTIONS.keys():
532                continue
533            conv_name = OPTIONS[key]
534            converter = getattr(Converters, conv_name, None)
535            if converter is None:
536                print "WARNING: cannot find converter %s" % conv_name
537                continue
538            row[key] = converter(row[key], row)
539        try:
540            writer.writerow(row)
541        except:
542            print row['student_id']
543
544    print "Output written to %s" % output_file
545
546
547if __name__ == '__main__':
548    if len(sys.argv) != 2:
549        print 'Usage: %s <filename>' % __file__
550        sys.exit(1)
551    main()
Note: See TracBrowser for help on using the repository browser.