## ## zaccess-nginx.py ## Read nginx-access log from stdin and output SQL data. ## ## Login : ## Started on Wed Feb 7 03:02:47 2007 Uli Fouquet ## $Id$ ## ## Copyright (C) 2007 Uli Fouquet ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## import sys import re ## ## Set mode to... ## "COPY": give PostGres-compatible COPY-syntax output. ## other : give usual (slow) SQL-output ## mode = "COPY" faillog = "/var/log/updatedblog/zaccess-failed.log" month = { "Jan" : 1, "Feb" : 2, "Mar" : 3, "Apr" : 4, "May" : 5, "Jun" : 6, "Jul" : 7, "Aug" : 8, "Sep" : 9, "Oct" : 10, "Nov" : 11, "Dec" : 12 } ## ## The format of an nginx access.log line is like this... ## The expression looks for the following fields: ## ## Fieldcontent | Fieldnum ## --------------------+--------- ## IP | 0 ## USER | 2 ## DATE (dd-mm-yyyy) | 3-4-5 ## TIME | 6 ## REQUEST | 7 ## STATUS | 8 ## BYTES | 9 ## REFERER | 10 ## CLIENT | 11 ## SITE | 13 ## MISC(zip-ratio etc.)| 14 ## ## (just a dummy) | 12 ## ## It was hard work. Please do not touch! ## expr = re.compile("^([0-9\.]+) ([^ ]+) ([^\[]+) \[([0-9]+)\/([^\/]+)\/([0-9]+):(..:..:..) .+\] \"([^\"]{1,1024})\" ([0-9]+) ([0-9]+) \"([^\"]{1,1012})\" \"([^\"]{1,1012})\"( \"([^\"]+)\" (.*))?$" ) ## Number of line lnum = 1 if mode == "COPY": print 'COPY "ZACCESS" FROM stdin;' ## Read line for line... data=sys.stdin.readline() while data: ## Apply reg. expression... m = expr.match( data ) if not m: # Line did not match... fd = open( faillog, "a") fd.write( str( data ) + '\n' ) fd.close() data=sys.stdin.readline().strip() lnum += 1 continue res = m.groups() ## The database expects the following fields: ## ## ID, DATESTR, TIMESTR, SITE, IP, USER, REQUEST, ## STATUS, BYTES, REFERER, CLIENT, MISC ## ## in that order. if mode == "COPY": print "%07d\t%s-%s-%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( lnum, res[5], month[res[4]], res[3], res[6], res[13] or '', res[0], res[2], res[7], res[8], res[9], res[10], res[11], res[14] or '' ) else: print "INSERT INTO \"ZACCESS\" (\"ID\",\"DATESTR\",\"TIMESTR\",\"SITE\",\"IP\",\"USER\",\"REQUEST\",\"STATUS\",\"BYTES\",\"REFERER\",\"CLIENT\",\"UNKNOWN\") VALUES ('%07d','%s-%s-%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % ( lnum, res[5], month[res[4]], res[3], res[6], res[13] or '', res[0], res[2], res[7], res[8], res[9], res[10], res[11], res[14] or '' ) pass data=sys.stdin.readline().strip() lnum += 1