1 | ## |
---|
2 | ## zaccess-nginx.py |
---|
3 | ## Read nginx-access log from stdin and output SQL data. |
---|
4 | ## |
---|
5 | ## Login : <uli@pu.smp.net> |
---|
6 | ## Started on Wed Feb 7 03:02:47 2007 Uli Fouquet |
---|
7 | ## $Id$ |
---|
8 | ## |
---|
9 | ## Copyright (C) 2007 Uli Fouquet |
---|
10 | ## This program is free software; you can redistribute it and/or modify |
---|
11 | ## it under the terms of the GNU General Public License as published by |
---|
12 | ## the Free Software Foundation; either version 2 of the License, or |
---|
13 | ## (at your option) any later version. |
---|
14 | ## |
---|
15 | ## This program is distributed in the hope that it will be useful, |
---|
16 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
17 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
18 | ## GNU General Public License for more details. |
---|
19 | ## |
---|
20 | ## You should have received a copy of the GNU General Public License |
---|
21 | ## along with this program; if not, write to the Free Software |
---|
22 | ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
23 | ## |
---|
24 | |
---|
25 | import sys |
---|
26 | import re |
---|
27 | |
---|
28 | ## |
---|
29 | ## Set mode to... |
---|
30 | ## "COPY": give PostGres-compatible COPY-syntax output. |
---|
31 | ## other : give usual (slow) SQL-output |
---|
32 | ## |
---|
33 | mode = "COPY" |
---|
34 | |
---|
35 | faillog = "/var/log/updatedblog/zaccess-failed.log" |
---|
36 | |
---|
37 | month = { |
---|
38 | "Jan" : 1, |
---|
39 | "Feb" : 2, |
---|
40 | "Mar" : 3, |
---|
41 | "Apr" : 4, |
---|
42 | "May" : 5, |
---|
43 | "Jun" : 6, |
---|
44 | "Jul" : 7, |
---|
45 | "Aug" : 8, |
---|
46 | "Sep" : 9, |
---|
47 | "Oct" : 10, |
---|
48 | "Nov" : 11, |
---|
49 | "Dec" : 12 |
---|
50 | } |
---|
51 | |
---|
52 | ## |
---|
53 | ## The format of an nginx access.log line is like this... |
---|
54 | ## The expression looks for the following fields: |
---|
55 | ## |
---|
56 | ## Fieldcontent | Fieldnum |
---|
57 | ## --------------------+--------- |
---|
58 | ## IP | 0 |
---|
59 | ## USER | 2 |
---|
60 | ## DATE (dd-mm-yyyy) | 3-4-5 |
---|
61 | ## TIME | 6 |
---|
62 | ## REQUEST | 7 |
---|
63 | ## STATUS | 8 |
---|
64 | ## BYTES | 9 |
---|
65 | ## REFERER | 10 |
---|
66 | ## CLIENT | 11 |
---|
67 | ## SITE | 13 |
---|
68 | ## MISC(zip-ratio etc.)| 14 |
---|
69 | ## |
---|
70 | ## (just a dummy) | 12 |
---|
71 | ## |
---|
72 | ## It was hard work. Please do not touch! |
---|
73 | ## |
---|
74 | expr = re.compile("^([0-9\.]+) ([^ ]+) ([^\[]+) \[([0-9]+)\/([^\/]+)\/([0-9]+):(..:..:..) .+\] \"([^\"]{1,1024})\" ([0-9]+) ([0-9]+) \"([^\"]{1,1012})\" \"([^\"]{1,1012})\"( \"([^\"]+)\" (.*))?$" ) |
---|
75 | |
---|
76 | |
---|
77 | ## Number of line |
---|
78 | lnum = 1 |
---|
79 | |
---|
80 | if mode == "COPY": |
---|
81 | print 'COPY "ZACCESS" FROM stdin;' |
---|
82 | |
---|
83 | |
---|
84 | |
---|
85 | ## Read line for line... |
---|
86 | data=sys.stdin.readline() |
---|
87 | while data: |
---|
88 | ## Apply reg. expression... |
---|
89 | m = expr.match( data ) |
---|
90 | if not m: |
---|
91 | # Line did not match... |
---|
92 | fd = open( faillog, "a") |
---|
93 | fd.write( str( data ) + '\n' ) |
---|
94 | fd.close() |
---|
95 | |
---|
96 | data=sys.stdin.readline().strip() |
---|
97 | lnum += 1 |
---|
98 | continue |
---|
99 | |
---|
100 | res = m.groups() |
---|
101 | ## The database expects the following fields: |
---|
102 | ## |
---|
103 | ## ID, DATESTR, TIMESTR, SITE, IP, USER, REQUEST, |
---|
104 | ## STATUS, BYTES, REFERER, CLIENT, MISC |
---|
105 | ## |
---|
106 | ## in that order. |
---|
107 | |
---|
108 | if mode == "COPY": |
---|
109 | print "%07d\t%s-%s-%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( |
---|
110 | lnum, res[5], month[res[4]], res[3], res[6], |
---|
111 | res[13] or '', res[0], res[2], res[7], res[8], |
---|
112 | res[9], res[10], res[11], res[14] or '' |
---|
113 | ) |
---|
114 | else: |
---|
115 | print "INSERT INTO \"ZACCESS\" (\"ID\",\"DATESTR\",\"TIMESTR\",\"SITE\",\"IP\",\"USER\",\"REQUEST\",\"STATUS\",\"BYTES\",\"REFERER\",\"CLIENT\",\"UNKNOWN\") VALUES ('%07d','%s-%s-%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % ( |
---|
116 | lnum, res[5], month[res[4]], res[3], res[6], |
---|
117 | res[13] or '', res[0], res[2], res[7], res[8], |
---|
118 | res[9], res[10], res[11], res[14] or '' |
---|
119 | ) |
---|
120 | pass |
---|
121 | |
---|
122 | data=sys.stdin.readline().strip() |
---|
123 | lnum += 1 |
---|
124 | |
---|