from __future__ import with_statement import re counter = {} total_lines = 0 # finds the requester's IP address from a log line such as: # 2009-07-07 19:39:39 78.101.206.196 - W3SVC126 NT49 216.81.77.197 80 GET /index.asp - 200 0 21899 256 375 HTTP/1.1 Mozilla/4.0+(compatible;+MSIE+6.0;+Windows+NT+5.1;+SV1;+.NET+CLR+1.1.4322) - - regex = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} (?P[0-9\.]{7,15}) .*") with open("~/Downloads/ex090707.log") as current_file: for line in current_file.readlines(): ip_list = regex.findall(line) for ip_address in ip_list: if counter.has_key(ip_address): counter[ip_address] += 1 else: counter[ip_address] = 1 total_lines += 1 result = open("./sort_results.txt", 'w') counter_lines = sorted(counter.iteritems(), key=lambda (k,v): (v,k)) counter_lines.reverse() result.write(str(total_lines) + ' total\n\n\n') for k, c in counter_lines: result.write(k + '\t:\t' + str(c) + '\n') result.close()