deploy apache log anonymizer
authorRalf Jung <post@ralfj.de>
Tue, 1 May 2018 16:25:51 +0000 (18:25 +0200)
committerRalf Jung <post@ralfj.de>
Tue, 1 May 2018 16:25:51 +0000 (18:25 +0200)
roles/apache/files/log-anon [new file with mode: 0644]
roles/apache/tasks/main.yml

diff --git a/roles/apache/files/log-anon b/roles/apache/files/log-anon
new file mode 100644 (file)
index 0000000..51775a0
--- /dev/null
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+import sys, re
+from netaddr import IPAddress
+
+if len(sys.argv) != 2:
+       print >>sys.stderr, "Usage: %s filename" % sys.argv[0]
+       sys.exit(1)
+
+log = open(sys.argv[1], "a")
+
+# group 1 must be the prefix, group 2 the IP, group 3 the suffix
+ipmatch = r"([0-9a-f.:]+)"
+accesslog = re.compile(r"^(\S+ )"+ipmatch+r"( .*)$")
+errorlog = re.compile(r"^(.* \[client )"+ipmatch+r"(\] .*)$")
+
+while True:
+       line = sys.stdin.readline()
+       if not line: break
+       # process this line
+       match = accesslog.search(line)
+       if match is None:
+               match = errorlog.search(line)
+       # check what we got
+       if match is None:
+               # unknown line
+               log.write(line)
+       else:
+               prefix = match.group(1)
+               ip = match.group(2)
+               suffix = match.group(3)
+               try:
+                       ip = IPAddress(ip) # parse the addres
+                       ip = ip & (IPAddress('255.255.255.0') if ip.version == 4 else IPAddress('ffff:ffff:ffff::')) # mask out a bunch of bits
+                       # now we have a parsed representation of the IP address we want to print
+                       log.write(prefix+str(ip)+suffix+"\n")
+               except (ValueError, netaddr.core.AddrFormatError):
+                       # not actually an IP address...
+                       log.write(line)
+       log.flush()
index 12f9580c632affec50528d7558f1022e817eb2c7..a4d1b248863301e6eebfe718d97e5381c5ac65fb 100644 (file)
@@ -1,5 +1,5 @@
 - name: install apache
 - name: install apache
-  apt: name=apache2 state=latest
+  apt: name=apache2,python-netaddr state=latest
 - name: enable apache
   service: name=apache2 enabled=yes
 # config
 - name: enable apache
   service: name=apache2 enabled=yes
 # config
   loop:
   - access_compat
   notify: apache
   loop:
   - access_compat
   notify: apache
+- name: install log anonymization script
+  copy:
+    dest: /etc/apache2/log-anon
+    src: files/log-anon
+    mode: +x
 - name: install shared config files
   copy:
     dest: /etc/apache2/conf-available/{{ item }}
 - name: install shared config files
   copy:
     dest: /etc/apache2/conf-available/{{ item }}