/* * Web server log sorting utility. * Copyright (C) 2001 Stephen Ostermiller * http://ostermiller.org/contact.pl?regarding=webalizer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * See COPYING.TXT for details. */ import java.util.*; import java.text.*; import java.io.*; /** * Sorts web logs that are in combined log format. *
* Log files are assumed to be semi-sorted. If * too many log entries are out of order, an error * message will be printed and you should try again * with a larger look ahead pool size. *
* If P is the look ahead pool size, N is the number of log entries * and F is the number of files, this program will run * in order N*F*Log(P) time. (Notice that it is not optimized * for a large number of files and that it will run faster * with a smaller look ahead pool.) *
*
This program either reads form standard input (with no arguments) * or from each of the files specified as arguments. Output always * is written to standard output, errors to standard error. If you * wish to sort logs and remove duplicate entries or if you want to * bump up the look ahead pool size, simply recompile the program with * those options.
*java LogSorter access.log > sorted_access.log
* java LogSorter server1_access.log server2_access.log server3_access.log > sorted_access.log
* cat access1999.log access2000.log access2001.log | java LogSorter > sorted_access.log
java LogSorter access.log | webalizer
* java LogSorter access.log | gzip > sorted_access.log.gz
* gunzip -c access.log.gz | java LogSorter | gzip > sorted_access.log.gz
*/
public class LogSorter {
private static int LOOKAHEAD_POOL_SIZE = 8192;
private static boolean REMOVE_DUPLICATES = false;
private BufferedReader[] logs;
private LogEntry[] firstEntries;
private int lookAhead;
private static SimpleDateFormat logDateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss");
TreeSet pool = new TreeSet();
private LogEntry lastReturned;
public LogSorter(InputStream[] logs){
this (logs, LOOKAHEAD_POOL_SIZE);
}
public LogSorter(InputStream[] logs, int lookAhead){
this.logs = new BufferedReader[logs.length];
for (int i=0; i