##
## z2log.py
## Read Zope Z2 log from stdin and output SQL data.
##
## Login : <uli@pu.smp.net>
## Started on  Wed Feb  7 03:02:47 2007 Uli Fouquet
## $Id$
## 
## Copyright (C) 2007 Uli Fouquet
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
## 
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
## 
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##

import sys
import re

##
## Set mode to...
##   "COPY":  give PostGres-compatible COPY-syntax output.
##   other : give usual (slow) SQL-output
##
mode = "COPY"

faillog = "/var/log/updatedblog/z2log-failed.log"

month = {
    "Jan" : 1,
    "Feb" : 2,
    "Mar" : 3,
    "Apr" : 4,
    "May" : 5,
    "Jun" : 6,
    "Jul" : 7,
    "Aug" : 8,
    "Sep" : 9,
    "Oct" : 10,
    "Nov" : 11,
    "Dec" : 12
    }

##
## The format of an nginx access.log line is like this...
## The expression looks for the following fields:
##
## Fieldcontent        | Fieldnum
## --------------------+---------
## IP                  | 0
## USER                | 2
## DATE (dd-mm-yyyy)   | 3-4-5
## TIME                | 6
## REQUEST             | 7
## STATUS              | 8
## BYTES               | 9
## REFERER             | 10
## CLIENT              | 11
## SITE                | -- 
## MISC                | --
##
## It was hard work. Please do not touch!
##
expr = re.compile("^([0-9\.]+) ([^ ]+) ([^\[]+) ?\[([0-9]+)\/([^\/]+)\/([0-9]+):(..:..:..) .+\] \"([^\"]+)\" ([0-9]+) ([0-9]+) \"([^\"]*)\"(.*)")
expr = re.compile("^([0-9\.]+) ([^ ]+) ([^\[]{1,64}) ?\[([0-9]+)\/([^\/]+)\/([0-9]+):(..:..:..) .+\] \"([^\"]+)\" ([0-9]+) ([0-9]+) \"([^\"]*)\"(.*)")



## Number of line
lnum = 1

if mode == "COPY":
    print 'COPY "z2log" FROM stdin;'

## Read line for line...
data=sys.stdin.readline()
while data:
    ## Apply reg. expression...
    m = expr.match( data )

    if not m:
        # Line did not match...
        #print data
        fd = open( faillog, "a")
        fd.write( str( data ) + '\n' )
        fd.close()
        data=sys.stdin.readline().strip()
        lnum += 1
        continue
    
    res = m.groups()
    ## The database expects the following fields:
    ##
    ## ID, DATESTR, TIMESTR, SITE, IP, USER, REQUEST,
    ##   STATUS, BYTES, REFERER, CLIENT, MISC
    ##
    ## in that order.
    
    if mode == "COPY":
        print "%07d\t%s-%s-%s\t%s\t\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t" % (
            lnum, res[5], month[res[4]], res[3], res[6],
            res[0], res[2], res[7], res[8],
            res[9], res[10], res[11]
            )
        pass
    else:
        print "INSERT INTO \"z2log\" (\"ID\",\"DATESTR\",\"TIMESTR\",\"SITE\",\"IP\",\"USER\",\"REQUEST\",\"STATUS\",\"BYTES\",\"REFERER\",\"CLIENT\",\"UNKNOWN\") VALUES ('%07d','%s-%s-%s','%s','','%s','%s','%s','%s','%s','%s','%s','');" % (
            lnum, res[5], month[res[4]], res[3], res[6],
            res[0], res[2], res[7], res[8],
            res[9], res[10], res[11]
            )
        pass
    
    data=sys.stdin.readline().strip()
    lnum += 1

