## $Id: batching.py 12094 2014-11-30 08:12:27Z henrik $
##
## Copyright (C) 2014 Uli Fouquet & Henrik Bettermann
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##
"""Batch processing components for customer objects.

Batch processors eat CSV files to add, update or remove large numbers
of certain kinds of objects at once.

Here we define the processors for customers specific objects like
customers and subobjects.
"""
import grok
import unicodecsv as csv  # XXX: csv ops should move to dedicated module.
from time import time
from datetime import datetime
from zope.i18n import translate
from zope.interface import Interface
from zope.schema import getFields
from zope.component import queryUtility, getUtility, createObject
from zope.event import notify
from zope.catalog.interfaces import ICatalog
from hurry.workflow.interfaces import IWorkflowState, IWorkflowInfo
from waeup.ikoba.interfaces import (
    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
    IObjectHistory, IGNORE_MARKER)
from waeup.ikoba.interfaces import IIkobaUtils
from waeup.ikoba.interfaces import MessageFactory as _
from waeup.ikoba.documents.utils import generate_document_id
from waeup.ikoba.customers.interfaces import (
    ICustomer, ICustomerUpdateByRegNo,
    ICustomerDocument, IApplication)
from waeup.ikoba.customers.workflow import  (
    IMPORTABLE_REGISTRATION_STATES, IMPORTABLE_REGISTRATION_TRANSITIONS,
    IMPORTABLE_APPLICATION_STATES, IMPORTABLE_APPLICATION_TRANSITIONS)
from waeup.ikoba.utils.batching import BatchProcessor


class CustomerProcessor(BatchProcessor):
    """A batch processor for ICustomer objects.
    """
    grok.implements(IBatchProcessor)
    grok.provides(IBatchProcessor)
    grok.context(Interface)
    util_name = 'customerprocessor'
    grok.name(util_name)

    name = _('Customer Processor')
    iface = ICustomer
    iface_byregnumber = ICustomerUpdateByRegNo

    location_fields = []
    factory_name = 'waeup.Customer'

    @property
    def available_fields(self):
        fields = getFields(self.iface)
        return sorted(list(set(
            ['customer_id', 'reg_number',
            'password', 'state', 'transition'] + fields.keys())))

    def checkHeaders(self, headerfields, mode='create'):
        if 'state' in headerfields and 'transition' in headerfields:
            raise FatalCSVError(
                "State and transition can't be  imported at the same time!")
        if not 'reg_number' in headerfields and not 'customer_id' \
            in headerfields:
            raise FatalCSVError(
                "Need at least columns customer_id or reg_number " +
                "for import!")
        if mode == 'create':
            for field in self.required_fields:
                if not field in headerfields:
                    raise FatalCSVError(
                        "Need at least columns %s for import!" %
                        ', '.join(["'%s'" % x for x in self.required_fields]))
        # Check for fields to be ignored...
        not_ignored_fields = [x for x in headerfields
                              if not x.startswith('--')]
        if len(set(not_ignored_fields)) < len(not_ignored_fields):
            raise FatalCSVError(
                "Double headers: each column name may only appear once.")
        return True

    def parentsExist(self, row, site):
        return 'customers' in site.keys()

    def getLocator(self, row):
        if row.get('customer_id', None) not in (None, IGNORE_MARKER):
            return 'customer_id'
        elif row.get('reg_number', None) not in (None, IGNORE_MARKER):
            return 'reg_number'
        else:
            return None

    # The entry never exists in create mode.
    def entryExists(self, row, site):
        return self.getEntry(row, site) is not None

    def getParent(self, row, site):
        return site['customers']

    def getEntry(self, row, site):
        if not 'customers' in site.keys():
            return None
        if self.getLocator(row) == 'customer_id':
            if row['customer_id'] in site['customers']:
                customer = site['customers'][row['customer_id']]
                return customer
        elif self.getLocator(row) == 'reg_number':
            reg_number = row['reg_number']
            cat = queryUtility(ICatalog, name='customers_catalog')
            results = list(
                cat.searchResults(reg_number=(reg_number, reg_number)))
            if results:
                return results[0]
        return None

    def addEntry(self, obj, row, site):
        parent = self.getParent(row, site)
        parent.addCustomer(obj)
        # Reset _curr_cust_id if customer_id has been imported
        if self.getLocator(row) == 'customer_id':
            parent._curr_cust_id -= 1
        # We have to log this if state is provided. If not,
        # logging is done by the event handler handle_customer_added
        if 'state' in row:
            parent.logger.info('%s - Customer record created' % obj.customer_id)
        history = IObjectHistory(obj)
        history.addMessage(_('Customer record created'))
        return

    def delEntry(self, row, site):
        customer = self.getEntry(row, site)
        if customer is not None:
            parent = self.getParent(row, site)
            parent.logger.info('%s - Customer removed' % customer.customer_id)
            del parent[customer.customer_id]
        pass

    def checkUpdateRequirements(self, obj, row, site):
        """Checks requirements the object must fulfill when being updated.

        This method is not used in case of deleting or adding objects.

        Returns error messages as strings in case of requirement
        problems.
        """
        transition = row.get('transition', IGNORE_MARKER)
        if transition not in (IGNORE_MARKER, ''):
            allowed_transitions = IWorkflowInfo(obj).getManualTransitionIds()
            if transition not in allowed_transitions:
                return 'Transition not allowed.'
        return None

    def updateEntry(self, obj, row, site, filename):
        """Update obj to the values given in row.
        """
        items_changed = ''

        # Remove customer_id from row if empty
        if 'customer_id' in row and row['customer_id'] in (None, IGNORE_MARKER):
            row.pop('customer_id')

        # Update password
        # XXX: Take DELETION_MARKER into consideration
        if 'password' in row:
            passwd = row.get('password', IGNORE_MARKER)
            if passwd not in ('', IGNORE_MARKER):
                if passwd.startswith('{SSHA}'):
                    # already encrypted password
                    obj.password = passwd
                else:
                    # not yet encrypted password
                    IUserAccount(obj).setPassword(passwd)
                items_changed += ('%s=%s, ' % ('password', passwd))
            row.pop('password')

        # Update registration state
        if 'state' in row:
            state = row.get('state', IGNORE_MARKER)
            if state not in (IGNORE_MARKER, ''):
                value = row['state']
                IWorkflowState(obj).setState(value)
                msg = _("State '${a}' set", mapping={'a': value})
                history = IObjectHistory(obj)
                history.addMessage(msg)
                items_changed += ('%s=%s, ' % ('state', state))
            row.pop('state')

        if 'transition' in row:
            transition = row.get('transition', IGNORE_MARKER)
            if transition not in (IGNORE_MARKER, ''):
                value = row['transition']
                IWorkflowInfo(obj).fireTransition(value)
                items_changed += ('%s=%s, ' % ('transition', transition))
            row.pop('transition')

        # apply other values...
        items_changed += super(CustomerProcessor, self).updateEntry(
            obj, row, site, filename)

        # Log actions...
        parent = self.getParent(row, site)
        if hasattr(obj, 'customer_id'):
            # Update mode: the customer exists and we can get the customer_id.
            # Create mode: the record contains the customer_id
            parent.logger.info(
                '%s - %s - %s - updated: %s'
                % (self.name, filename, obj.customer_id, items_changed))
        else:
            # Create mode: the customer does not yet exist
            # XXX: It seems that this never happens because customer_id
            # is always set.
            parent.logger.info(
                '%s - %s - %s - imported: %s'
                % (self.name, filename, obj.customer_id, items_changed))
        return items_changed

    def getMapping(self, path, headerfields, mode):
        """Get a mapping from CSV file headerfields to actually used fieldnames.
        """
        result = dict()
        reader = csv.reader(open(path, 'rb'))
        raw_header = reader.next()
        for num, field in enumerate(headerfields):
            if field not in ['customer_id', 'reg_number'] and mode == 'remove':
                continue
            if field == u'--IGNORE--':
                # Skip ignored columns in failed and finished data files.
                continue
            result[raw_header[num]] = field
        return result

    def checkConversion(self, row, mode='create'):
        """Validates all values in row.
        """
        iface = self.iface
        if mode in ['update', 'remove']:
            if self.getLocator(row) == 'reg_number':
                iface = self.iface_byregnumber
        converter = IObjectConverter(iface)
        errs, inv_errs, conv_dict = converter.fromStringDict(
            row, self.factory_name, mode=mode)
        if 'transition' in row:
            if row['transition'] not in IMPORTABLE_REGISTRATION_TRANSITIONS:
                if row['transition'] not in (IGNORE_MARKER, ''):
                    errs.append(('transition', 'not allowed'))
        if 'state' in row:
            if row['state'] not in IMPORTABLE_REGISTRATION_STATES:
                if row['state'] not in (IGNORE_MARKER, ''):
                    errs.append(('state', 'not allowed'))
                else:
                    # State is an attribute of Customer and must not
                    # be changed if empty.
                    conv_dict['state'] = IGNORE_MARKER
        try:
            # Correct cust_id counter. As the IConverter for customers
            # creates customer objects that are not used afterwards, we
            # have to fix the site-wide customer_id counter.
            site = grok.getSite()
            customers = site['customers']
            customers._curr_cust_id -= 1
        except (KeyError, TypeError, AttributeError):
                pass
        return errs, inv_errs, conv_dict


class CustomerProcessorBase(BatchProcessor):
    """A base for customer subitem processors.

    Helps reducing redundancy.
    """
    grok.baseclass()

    # additional available fields
    # beside 'customer_id', 'reg_number' and 'matric_number'
    additional_fields = []

    #: header fields additionally required
    additional_headers = []

    @property
    def available_fields(self):
        fields = ['customer_id', 'reg_number'] + self.additional_fields
        return sorted(list(set(fields + getFields(
                self.iface).keys())))

    def checkHeaders(self, headerfields, mode='ignore'):
        if not 'reg_number' in headerfields and not 'customer_id' \
            in headerfields:
            raise FatalCSVError(
                "Need at least columns customer_id " +
                "or reg_number for import!")
        for name in self.additional_headers:
            if not name in headerfields:
                raise FatalCSVError(
                    "Need %s for import!" % name)

        # Check for fields to be ignored...
        not_ignored_fields = [x for x in headerfields
                              if not x.startswith('--')]
        if len(set(not_ignored_fields)) < len(not_ignored_fields):
            raise FatalCSVError(
                "Double headers: each column name may only appear once.")
        return True

    def _getCustomer(self, row, site):
        NON_VALUES = ['', IGNORE_MARKER]
        if not 'customers' in site.keys():
            return None
        if row.get('customer_id', '') not in NON_VALUES:
            if row['customer_id'] in site['customers']:
                customer = site['customers'][row['customer_id']]
                return customer
        elif row.get('reg_number', '') not in NON_VALUES:
            reg_number = row['reg_number']
            cat = queryUtility(ICatalog, name='customers_catalog')
            results = list(
                cat.searchResults(reg_number=(reg_number, reg_number)))
            if results:
                return results[0]
        return None

    def parentsExist(self, row, site):
        return self.getParent(row, site) is not None

    def entryExists(self, row, site):
        return self.getEntry(row, site) is not None

    def checkConversion(self, row, mode='ignore'):
        """Validates all values in row.
        """
        converter = IObjectConverter(self.iface)
        errs, inv_errs, conv_dict = converter.fromStringDict(
            row, self.factory_name, mode=mode)
        return errs, inv_errs, conv_dict

    def getMapping(self, path, headerfields, mode):
        """Get a mapping from CSV file headerfields to actually used fieldnames.
        """
        result = dict()
        reader = csv.reader(open(path, 'rb'))
        raw_header = reader.next()
        for num, field in enumerate(headerfields):
            if field not in ['customer_id', 'reg_number', 'document_id', 'code', 'level'
                             ] and mode == 'remove':
                continue
            if field == u'--IGNORE--':
                # Skip ignored columns in failed and finished data files.
                continue
            result[raw_header[num]] = field
        return result

class CustomerDocumentProcessor(CustomerProcessorBase):
    """A batch processor for ICustomerDocument objects.
    """
    grok.implements(IBatchProcessor)
    grok.provides(IBatchProcessor)
    grok.context(Interface)
    util_name = 'customerdocumentprocessor'
    grok.name(util_name)

    name = _('CustomerDocument Processor')
    iface = ICustomerDocument
    factory_name = 'waeup.CustomerSampleDocument'

    location_fields = []
    additional_fields = ['document_id', 'class_name']
    additional_headers = ['class_name']

    def checkHeaders(self, headerfields, mode='ignore'):
        super(CustomerDocumentProcessor, self).checkHeaders(headerfields)
        if mode in ('update', 'remove') and not 'document_id' in headerfields:
            raise FatalCSVError(
                "Need document_id for import in update and remove modes!")
        return True

    def getParent(self, row, site):
        customer = self._getCustomer(row, site)
        if customer is None:
            return None
        return customer['documents']

    def getEntry(self, row, site):
        documents = self.getParent(row, site)
        if documents is None:
            return None
        document_id = row.get('document_id', None)
        if document_id is None:
            return None
        entry = documents.get(document_id)
        return entry

    def updateEntry(self, obj, row, site, filename):
        """Update obj to the values given in row.
        """
        items_changed = super(CustomerDocumentProcessor, self).updateEntry(
            obj, row, site, filename)
        customer = self.getParent(row, site).__parent__
        customer.__parent__.logger.info(
            '%s - %s - %s - updated: %s'
            % (self.name, filename, customer.customer_id, items_changed))
        return

    def addEntry(self, obj, row, site):
        parent = self.getParent(row, site)
        document_id = row['document_id'].strip('#')
        parent[document_id] = obj
        return

    def delEntry(self, row, site):
        document = self.getEntry(row, site)
        parent = self.getParent(row, site)
        if document is not None:
            customer = self._getCustomer(row, site)
            customer.__parent__.logger.info('%s - Document removed: %s'
                % (customer.customer_id, document.document_id))
            del parent[document.document_id]
        return

    def checkConversion(self, row, mode='ignore'):
        """Validates all values in row.
        """
        errs, inv_errs, conv_dict = super(
            CustomerDocumentProcessor, self).checkConversion(row, mode=mode)
        # We need to check if the class_name corresponds with the
        # processor chosen. This is to avoid accidentally wrong imports.
        if mode != 'remove':
            class_name = row.get('class_name', None)
            if class_name != self.factory_name.strip('waeup.'):
                errs.append(('class_name','wrong processor'))
        # We have to check document_id.
        document_id = row.get('document_id', None)
        if mode == 'create':
            if not document_id:
                document_id = generate_document_id()
                conv_dict['document_id'] = document_id
                return errs, inv_errs, conv_dict
            cat = queryUtility(ICatalog, name='documents_catalog')
            results = list(
                cat.searchResults(document_id=(document_id, document_id)))
            if results:
                # document_id must not exist.
                errs.append(('document_id','id exists'))
        else:
            if not document_id.startswith('d'):
                errs.append(('document_id','invalid format'))
        return errs, inv_errs, conv_dict


class ApplicationProcessor(CustomerProcessorBase):
    """A batch processor for IApplication objects.
    """
    grok.implements(IBatchProcessor)
    grok.provides(IBatchProcessor)
    grok.context(Interface)
    util_name = 'applicationprocessor'
    grok.name(util_name)

    name = _('Application Processor')
    iface = IApplication
    factory_name = 'waeup.SampleApplication'

    location_fields = []
    additional_fields = ['application_id', 'class_name']
    additional_headers = ['class_name']

    def checkHeaders(self, headerfields, mode='ignore'):
        super(ApplicationProcessor, self).checkHeaders(headerfields)
        if mode in ('update', 'remove') and not 'application_id' in headerfields:
            raise FatalCSVError(
                "Need application_id for import in update and remove modes!")
        return True

    def getParent(self, row, site):
        customer = self._getCustomer(row, site)
        if customer is None:
            return None
        return customer['applications']

    def getEntry(self, row, site):
        applications = self.getParent(row, site)
        if applications is None:
            return None
        application_id = row.get('application_id', None)
        if application_id is None:
            return None
        entry = applications.get(application_id)
        return entry

    def updateEntry(self, obj, row, site, filename):
        """Update obj to the values given in row.
        """
        items_changed = super(ApplicationProcessor, self).updateEntry(
            obj, row, site, filename)
        customer = self.getParent(row, site).__parent__
        customer.__parent__.logger.info(
            '%s - %s - %s - updated: %s'
            % (self.name, filename, customer.customer_id, items_changed))
        return

    def addEntry(self, obj, row, site):
        parent = self.getParent(row, site)
        application_id = row['application_id'].strip('#')
        parent[application_id] = obj
        return

    def delEntry(self, row, site):
        application = self.getEntry(row, site)
        parent = self.getParent(row, site)
        if application is not None:
            customer = self._getCustomer(row, site)
            customer.__parent__.logger.info('%s - Application removed: %s'
                % (customer.customer_id, application.application_id))
            del parent[application.application_id]
        return

    def checkConversion(self, row, mode='ignore'):
        """Validates all values in row.
        """
        errs, inv_errs, conv_dict = super(
            ApplicationProcessor, self).checkConversion(row, mode=mode)
        # We need to check if the class_name corresponds with the
        # processor chosen. This is to avoid accidentally wrong imports.
        if mode != 'remove':
            class_name = row.get('class_name', None)
            if class_name != self.factory_name.strip('waeup.'):
                errs.append(('class_name','wrong processor'))
        # We have to check application_id.
        application_id = row.get('application_id', None)
        if mode == 'create':
            if not application_id:
                application_id = generate_application_id()
                conv_dict['application_id'] = application_id
                return errs, inv_errs, conv_dict
            cat = queryUtility(ICatalog, name='applications_catalog')
            results = list(
                cat.searchResults(application_id=(application_id, application_id)))
            if results:
                # application_id must not exist.
                errs.append(('application_id','id exists'))
        else:
            if not application_id.startswith('a'):
                errs.append(('application_id','invalid format'))
        return errs, inv_errs, conv_dict