/*
 * Web server log sorting utility.
 * Copyright (C) 2001 Stephen Ostermiller 
 * http://ostermiller.org/contact.pl?regarding=webalizer
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * See COPYING.TXT for details.
 */

import java.util.*;
import java.text.*;
import java.io.*;

/**
 * Sorts web logs that are in combined log format.
 * <p>
 * Log files are assumed to be semi-sorted.   If
 * too many log entries are out of order, an error
 * message will be printed and you should try again
 * with a larger look ahead pool size.
 * <p>
 * If P is the look ahead pool size, N is the number of log entries
 * and F is the number of files, this program will run
 * in order N*F*Log(P) time.  (Notice that it is not optimized
 * for a large number of files and that it will run faster
 * with a smaller look ahead pool.)
 * <p>
 * <h3>Usage:</h3>
 * <p>This program either reads form standard input (with no arguments)
 * or from each of the files specified as arguments.  Output always
 * is written to standard output, errors to standard error.  If you
 * wish to sort logs and remove duplicate entries or if you want to
 * bump up the look ahead pool size, simply recompile the program with
 * those options.</p> 
 * <h3>Examples:</h3>
 * <h4>Sorting a singe log file:</h4>
 * <code>java&nbsp;LogSorter&nbsp;access.log&nbsp;>&nbsp;sorted_access.log</code>
 * <h4>Merging log files from several servers:</h4>
 * <code>java&nbsp;LogSorter&nbsp;server1_access.log&nbsp;server2_access.log&nbsp;server3_access.log&nbsp;>&nbsp;sorted_access.log</code>
 * <h4>Merging rotated log files:</h4>
 * <code>cat&nbsp;access1999.log&nbsp;access2000.log&nbsp;access2001.log&nbsp;|&nbsp;java&nbsp;LogSorter&nbsp;>&nbsp;sorted_access.log</code><br>
 * (concatinating all the log files and sending them to standard input has
 * effect of running this program with one log file rather than F files.)
 * <h4>Sending the output directly to Webalizer</h4>
 * <code>java&nbsp;LogSorter&nbsp;access.log&nbsp;|&nbsp;webalizer</code>
 * <h4>Sending the output to gzip:</h4>
 * <code>java&nbsp;LogSorter&nbsp;access.log&nbsp;|&nbsp;gzip&nbsp;>&nbsp;sorted_access.log.gz</code>
 * <h4>Sorting gzipped log files:</h4>
 * <code>gunzip&nbsp;-c&nbsp;access.log.gz&nbsp;|&nbsp;java&nbsp;LogSorter&nbsp;|&nbsp;gzip&nbsp;>&nbsp;sorted_access.log.gz</code>
 */
public class LogSorter {

	private static int LOOKAHEAD_POOL_SIZE = 8192;
	private static boolean REMOVE_DUPLICATES = false;
	
	private BufferedReader[] logs; 
	private LogEntry[] firstEntries;
	private int lookAhead;
	private static SimpleDateFormat logDateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss");
	TreeSet pool = new TreeSet();
	private LogEntry lastReturned;

	public LogSorter(InputStream[] logs){
		this (logs, LOOKAHEAD_POOL_SIZE);
	}
	
	public LogSorter(InputStream[] logs, int lookAhead){
		this.logs = new BufferedReader[logs.length];
		for (int i=0; i<logs.length; i++){
			this.logs[i] = new BufferedReader(new InputStreamReader(logs[i]));
		}
		this.lookAhead = lookAhead;
		firstEntries = new LogEntry[logs.length];
		for (int i=0; i<logs.length; i++){
			fillFirstEntry(i);	 
		}
		fillPool();
	} 
	
	public void fillFirstEntry(int i){
		boolean found = false;
		while (!found){
			try {
				String entry = this.logs[i].readLine();
				if (entry == null){
					firstEntries[i] = null;
				} else {				
					firstEntries[i] = new LogEntry(entry);
				}
				found = true;
			} catch (IOException x){
				System.err.println(x);
				firstEntries[i] = null;
				found = true;
			} catch (IllegalArgumentException x){
				System.err.println(x);
				firstEntries[i] = null;
			}
		}
	}
	
	public String next(){
		LogEntry ret;
		try {
			ret = (LogEntry)pool.first();
			pool.remove(ret);
			fillPool();
			if (lastReturned != null){
				if (ret.compareTo(lastReturned) < 0){
					System.err.println(
						"Logs may be out of order, a date of " + 
						lastReturned.date + 
						" was already printed but a date of " +
						ret.date +
						" was just found.  Try increasing the lookAhead pool size. (" +
						pool.size() +
						")"
					);
				} else {
					lastReturned = ret;
				}
			} else {
				lastReturned = ret;
			} 
			return ret.entry;
		} catch (NoSuchElementException x){
			return null;
		}	  
	}
	
	private LogEntry getLeastFirstEntry(){
		int least = -1; 	   
		for (int i=0; i<firstEntries.length; i++){
			if (firstEntries[i] != null){
				if (least==-1 || firstEntries[i].compareTo(firstEntries[least]) < 0){
					least = i;
				}
			}
		}
		if (least == -1){
			return null;
		}
		LogEntry ret = firstEntries[least];
		fillFirstEntry(least);
		return ret;
	}
	
	public static void main(String[] args){
		try {
			InputStream[] logs;
			if(args.length > 0){
				logs = new InputStream[args.length];
				for (int i=0; i<logs.length; i++){
					logs[i] = new FileInputStream(args[i]);
				}  
			} else {
				logs = new InputStream[]{
					System.in
				};
			}
			LogSorter sorter = new LogSorter(logs);
			String s;
			while ((s = sorter.next()) != null){
				System.out.println(s);
			}
		} catch (IOException x){
			System.err.println(x);
		}		   
	}
	
	private void fillPool(){
		LogEntry entry;
		while (pool.size() < lookAhead && (entry = getLeastFirstEntry()) != null){
			if (!pool.add(entry)){
				System.err.println("Duplicate log entry: " + entry);
			}
		}
	}
	
	private static long counter = 0;
	
	class LogEntry implements Comparable{		 
		private long count;
		public LogEntry (String s){
			entry = s;
			int startParse = s.indexOf("[");
			if (startParse == -1){
				throw new IllegalArgumentException("No date found in log entry: " + s);
			}
			ParsePosition p = new ParsePosition(startParse + 1);
			date = logDateFormat.parse(s, p);
			if (date == null){
				throw new IllegalArgumentException("No date found in log entry: " + s);
			}
			count = LogSorter.counter++;
		}
		public String entry;
		public Date date;
		public String toString(){
			return entry;
		}
		public int compareTo(Object o){
			LogEntry l = (LogEntry)o;
			int result = this.date.compareTo(l.date);
			if (result == 0){
				result = this.entry.compareTo(l.entry);
			} 
			if (result == 0 && !REMOVE_DUPLICATES){
				result = (int)(count - l.count);
			}
			return result;
		}
	}
}

