source: WAeUP_SysConf/logging/trunk/usr/local/lib/zaccess-nginx.py @ 3824

Last change on this file since 3824 was 2826, checked in by uli, 17 years ago

Added logging scripts and configs.

File size: 3.4 KB
Line 
1##
2## zaccess-nginx.py
3## Read nginx-access log from stdin and output SQL data.
4##
5## Login : <uli@pu.smp.net>
6## Started on  Wed Feb  7 03:02:47 2007 Uli Fouquet
7## $Id$
8##
9## Copyright (C) 2007 Uli Fouquet
10## This program is free software; you can redistribute it and/or modify
11## it under the terms of the GNU General Public License as published by
12## the Free Software Foundation; either version 2 of the License, or
13## (at your option) any later version.
14##
15## This program is distributed in the hope that it will be useful,
16## but WITHOUT ANY WARRANTY; without even the implied warranty of
17## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18## GNU General Public License for more details.
19##
20## You should have received a copy of the GNU General Public License
21## along with this program; if not, write to the Free Software
22## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23##
24
25import sys
26import re
27
28##
29## Set mode to...
30##   "COPY":  give PostGres-compatible COPY-syntax output.
31##   other : give usual (slow) SQL-output
32##
33mode = "COPY"
34
35faillog = "/var/log/updatedblog/zaccess-failed.log"
36
37month = {
38    "Jan" : 1,
39    "Feb" : 2,
40    "Mar" : 3,
41    "Apr" : 4,
42    "May" : 5,
43    "Jun" : 6,
44    "Jul" : 7,
45    "Aug" : 8,
46    "Sep" : 9,
47    "Oct" : 10,
48    "Nov" : 11,
49    "Dec" : 12
50    }
51
52##
53## The format of an nginx access.log line is like this...
54## The expression looks for the following fields:
55##
56## Fieldcontent        | Fieldnum
57## --------------------+---------
58## IP                  | 0
59## USER                | 2
60## DATE (dd-mm-yyyy)   | 3-4-5
61## TIME                | 6
62## REQUEST             | 7
63## STATUS              | 8
64## BYTES               | 9
65## REFERER             | 10
66## CLIENT              | 11
67## SITE                | 13
68## MISC(zip-ratio etc.)| 14
69##
70## (just a dummy)      | 12
71##
72## It was hard work. Please do not touch!
73##
74expr = re.compile("^([0-9\.]+) ([^ ]+) ([^\[]+) \[([0-9]+)\/([^\/]+)\/([0-9]+):(..:..:..) .+\] \"([^\"]{1,1024})\" ([0-9]+) ([0-9]+) \"([^\"]{1,1012})\" \"([^\"]{1,1012})\"( \"([^\"]+)\" (.*))?$" )
75
76
77## Number of line
78lnum = 1
79
80if mode == "COPY":
81    print 'COPY "ZACCESS" FROM stdin;'
82
83
84
85## Read line for line...
86data=sys.stdin.readline()
87while data:
88    ## Apply reg. expression...
89    m = expr.match( data )
90    if not m:
91        # Line did not match...
92        fd = open( faillog, "a")
93        fd.write( str( data ) + '\n' )
94        fd.close()
95
96        data=sys.stdin.readline().strip()
97        lnum += 1
98        continue
99
100    res = m.groups()
101    ## The database expects the following fields:
102    ##
103    ## ID, DATESTR, TIMESTR, SITE, IP, USER, REQUEST,
104    ##   STATUS, BYTES, REFERER, CLIENT, MISC
105    ##
106    ## in that order.
107
108    if mode == "COPY":
109        print "%07d\t%s-%s-%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
110            lnum, res[5], month[res[4]], res[3], res[6],
111            res[13] or '', res[0], res[2], res[7], res[8],
112            res[9], res[10], res[11], res[14] or ''
113            )
114    else:
115        print "INSERT INTO \"ZACCESS\" (\"ID\",\"DATESTR\",\"TIMESTR\",\"SITE\",\"IP\",\"USER\",\"REQUEST\",\"STATUS\",\"BYTES\",\"REFERER\",\"CLIENT\",\"UNKNOWN\") VALUES ('%07d','%s-%s-%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % (
116            lnum, res[5], month[res[4]], res[3], res[6],
117            res[13] or '', res[0], res[2], res[7], res[8],
118            res[9], res[10], res[11], res[14] or ''
119            )
120        pass
121   
122    data=sys.stdin.readline().strip()
123    lnum += 1
124
Note: See TracBrowser for help on using the repository browser.