etherpad: configure DB charset like docs recommend
[ansible.git] / roles / apache / files / log-anon
1 #!/usr/bin/python
2 import sys, re
3 from netaddr import IPAddress, AddrFormatError
4
5 if len(sys.argv) != 2:
6         print >>sys.stderr, "Usage: %s filename" % sys.argv[0]
7         sys.exit(1)
8
9 log = open(sys.argv[1], "a")
10
11 # group 1 must be the prefix, group 2 the IP, group 3 the suffix
12 ipmatch = r"([0-9a-f.:]+)"
13 accesslog = re.compile(r"^(\S+ )"+ipmatch+r"( .*)$")
14 errorlog = re.compile(r"^(.* \[client )"+ipmatch+r"(:[0-9]+\] .*)$")
15
16 while True:
17         line = sys.stdin.readline()
18         if not line: break
19         # process this line
20         match = accesslog.search(line)
21         if match is None:
22                 match = errorlog.search(line)
23         # check what we got
24         if match is None:
25                 # unknown line
26                 log.write(line)
27         else:
28                 prefix = match.group(1)
29                 ip = match.group(2)
30                 suffix = match.group(3)
31                 try:
32                         ip = IPAddress(ip) # parse the addres
33                         ip = ip & (IPAddress('255.255.255.0') if ip.version == 4 else IPAddress('ffff:ffff:ffff::')) # mask out a bunch of bits
34                         # now we have a parsed representation of the IP address we want to print
35                         log.write(prefix+str(ip)+suffix+"\n")
36                 except (ValueError, AddrFormatError):
37                         # not actually an IP address...
38                         log.write(line)
39         log.flush()