/* * Read files in comma separated value format with a fist line of labels. * * Copyright (C) 2004 Campbell, Allen T. <allenc28@yahoo.com> * * Copyright (C) 2004 Stephen Ostermiller * http://ostermiller.org/contact.pl?regarding=Java+Utilities * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * See COPYING.TXT for details. */ package com.Ostermiller.util; import java.io.IOException; import java.util.*; /** * Decorate a CSVParse object to provide an index of field names. Many (most?) * CSV files have a list of field names (labels) as the first line. A * LabeledCSVParser will consume this line automatically. The methods * {@link #getLabels()}, {@link #getLabelIndex(String)} and * {@link #getValueByLabel(String)} allow these labels to be discovered and * used while parsing CSV data. This class can also be used to conveniently * ignore field labels if they happen to be present in a CSV file and are not * desired. * * @author Campbell, Allen T. <allenc28@yahoo.com> * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities * @since ostermillerutils 1.03.00 */ public class LabeledCSVParser implements CSVParse { /** * Class which actually does the parsing. Called for most methods. * * @since ostermillerutils 1.03.00 */ private CSVParse parse; /** * The first line of the CSV file - treated specially as labels. * Set by setLabels. * * @since ostermillerutils 1.03.00 */ private String[] labels; /** * Hash of the labels (String) to column number (Integer). * Set by setLabels. * * @since ostermillerutils 1.03.00 */ private Map<String,Integer> labelMap; /** * The last line read from the CSV file. Saved for getValueByLabel(). * * @since ostermillerutils 1.03.00 */ private String[] lastLine; /** * Set whenever nextValue is called and checked when getValueByLabel() is * called to enforce incompatibility between the methods. * * @since ostermillerutils 1.03.00 */ private int nextValueLine = -2; /** * Construct a LabeledCSVParser on a CSVParse implementation. * * @param parse CSVParse implementation * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ @SuppressWarnings("unused") public LabeledCSVParser(CSVParse parse) throws IOException { this.parse = parse; } /** * Change this parser so that it uses a new delimiter. * <p> * The initial character is a comma, the delimiter cannot be changed * to a quote or other character that has special meaning in CSV. * * @param newDelim delimiter to which to switch. * @throws BadDelimiterException if the character cannot be used as a delimiter. * * @since ostermillerutils 1.03.00 */ public void changeDelimiter(char newDelim) throws BadDelimiterException { parse.changeDelimiter(newDelim); } /** * Change this parser so that it uses a new character for quoting. * <p> * The initial character is a double quote ("), the delimiter cannot be changed * to a comma or other character that has special meaning in CSV. * * @param newQuote character to use for quoting. * @throws BadQuoteException if the character cannot be used as a quote. * * @since ostermillerutils 1.03.00 */ public void changeQuote(char newQuote) throws BadQuoteException { parse.changeQuote(newQuote); } /** * Get all the values from the file. * <p> * If the file has already been partially read, only the * values that have not already been read will be included. * <p> * Each line of the file that has at least one value will be * represented. Comments and empty lines are ignored. * <p> * The resulting double array may be jagged. * <p> * The last line of the values is saved and may be accessed * by getValueByLabel(). * * @return all the values from the file or null if there are no more values. * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ public String[][] getAllValues() throws IOException { if (labels == null) setLabels(); String[][] allValues = parse.getAllValues(); if (allValues == null){ lastLine = null; } else { lastLine = allValues[allValues.length-1]; } return allValues; } /** * Get the line number that the last token came from. * <p> * New line breaks that occur in the middle of a token are not * counted in the line number count. * <p> * The first line of labels does not count towards the line number. * * @return line number or -1 if no tokens have been returned yet. * * @since ostermillerutils 1.03.00 */ public int getLastLineNumber(){ return lastLineNumber(); } /** * Get the line number that the last token came from. * <p> * New line breaks that occur in the middle of a token are not * counted in the line number count. * <p> * The first line of labels does not count towards the line number. * * @return line number or -1 if no tokens have been returned yet. * * @since ostermillerutils 1.03.00 */ public int lastLineNumber(){ int lineNum = parse.getLastLineNumber(); if (lineNum <= -1) return -1; // Nothing has been read yet if (lineNum == 1) return -1; // only labels have been read return lineNum - 1; // adjust line number to account for the label line } /** * Get all the values from a line. * <p> * If the line has already been partially read, only the values that have not * already been read will be included. * <p> * In addition to returning all the values from a line, LabeledCSVParser * maintains a buffer of the values. This feature allows * {@link #getValueByLabel(String)} to function. In this case * {@link #getLine()} is used simply to iterate CSV data. The iteration ends * when null is returned. * <p> * <b>Note:</b> The methods {@link #nextValue()} and {@link #getAllValues()} * are incompatible with {@link #getValueByLabel(String)} because the former * methods cause the offset of field values to shift and corrupt the internal * buffer maintained by {@link #getLine}. * * @return all the values from the line or null if there are no more values. * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ public String[] getLine() throws IOException { if (labels == null) setLabels(); lastLine = parse.getLine(); return lastLine; } /** * Read the next value from the file. The line number from * which this value was taken can be obtained from getLastLineNumber(). * <p> * This method is not compatible with getValueByLabel(). Using this * method will make getValueByLabel() throw an IllegalStateException * for the rest of the line. * * @return the next value or null if there are no more values. * @throws IOException if an error occurs while reading. * * @since ostermillerutils 1.03.00 */ public String nextValue() throws IOException { if (labels == null) setLabels(); String nextValue = parse.nextValue(); nextValueLine = getLastLineNumber(); return nextValue; } /** * Initialize the LabeledCSVParser.labels member and LabeledCSVParser.labelMap * member. * * @throws IOException if an IO error occurs * * @since ostermillerutils 1.03.00 */ private void setLabels() throws IOException { labels = parse.getLine(); if (labels == null) return; labelMap = new HashMap<String,Integer>(); for (int i = 0; i < labels.length; i++){ labelMap.put(labels[i], new Integer(i)); } } /** * Return an array of all field names from the top * of the CSV file. * * @return Field names. * @throws IOException if an IO error occurs * * @since ostermillerutils 1.03.00 */ public String[] getLabels() throws IOException { if (labels == null) setLabels(); return labels; } /** * Get the index of the column having the given label. * The {@link #getLine()} method returns an * array of field values for a single record of data. This method returns * the index of a member of that array based on the specified field name. * The first field has the index 0. * * @param label The field name. * @return The index of the field name, or -1 if the label does not exist. * @deprecated may swallow an IOException while reading the labels - please use getLabelIdx() * * @since ostermillerutils 1.03.00 */ @Deprecated public int getLabelIndex(String label){ try { return getLabelIdx(label); } catch (IOException iox){ return -1; } } /** * Get the index of the column having the given label. * The {@link #getLine()} method returns an * array of field values for a single record of data. This method returns * the index of a member of that array based on the specified field name. * The first field has the index 0. * * @param label The field name. * @return The index of the field name, or -1 if the label does not exist. * @throws IOException if an IO error occurs * * @since ostermillerutils 1.04.02 */ public int getLabelIdx(String label) throws IOException { if (labels == null) setLabels(); if (labelMap == null) return -1; if (!labelMap.containsKey(label)) return -1; return (labelMap.get(label)).intValue(); } /** * Given the label for the column, get the column from the last line that * was read. If the column cannot be found in the line, null is returned. * * @param label The field name. * @throws IllegalStateException if nextValue has been called as part of getting the last line. nextValue is not compatible with this method. * @return the value from the last line read or null if there is no such value * * @since ostermillerutils 1.03.00 */ public String getValueByLabel(String label) throws IllegalStateException { if (nextValueLine == getLastLineNumber()) throw new IllegalStateException("nextValue() was used to get values from this line."); if (lastLine == null) return null; int fieldIndex; try { fieldIndex = getLabelIdx(label); } catch (IOException iox){ // Can't happen here because the labels have been read before the first line. throw new RuntimeException(iox); } if (fieldIndex == -1) return null; if (fieldIndex >= lastLine.length) return null; return lastLine[fieldIndex]; } /** * Close any stream upon which this parser is based. * * @throws IOException if an error occurs while closing the stream. * * @since ostermillerutils 1.03.00 */ public void close() throws IOException { parse.close(); } }