LabeledCSVParser Javadoc/*
* Read files in comma separated value format with a fist line of labels.
*
* Copyright (C) 2004 Campbell, Allen T. <allenc28@yahoo.com>
*
* Copyright (C) 2004-2010 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Java+Utilities
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See LICENSE.txt for details.
*/
package com.Ostermiller.util;
import java.io.IOException;
import java.util.*;
/**
* Decorate a CSVParse object to provide an index of field names. Many (most?)
* CSV files have a list of field names (labels) as the first line. A
* LabeledCSVParser will consume this line automatically. The methods
* {@link #getLabels()}, {@link #getLabelIndex(String)} and
* {@link #getValueByLabel(String)} allow these labels to be discovered and
* used while parsing CSV data. This class can also be used to conveniently
* ignore field labels if they happen to be present in a CSV file and are not
* desired.
*
* @author Campbell, Allen T. <allenc28@yahoo.com>
* @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
* @since ostermillerutils 1.03.00
*/
public class LabeledCSVParser implements CSVParse {
/**
* Class which actually does the parsing. Called for most methods.
*
* @since ostermillerutils 1.03.00
*/
private CSVParse parse;
/**
* The first line of the CSV file - treated specially as labels.
* Set by setLabels.
*
* @since ostermillerutils 1.03.00
*/
private String[] labels;
/**
* Hash of the labels (String) to column number (Integer).
* Set by setLabels.
*
* @since ostermillerutils 1.03.00
*/
private Map<String,Integer> labelMap;
/**
* The last line read from the CSV file. Saved for getValueByLabel().
*
* @since ostermillerutils 1.03.00
*/
private String[] lastLine;
/**
* Set whenever nextValue is called and checked when getValueByLabel() is
* called to enforce incompatibility between the methods.
*
* @since ostermillerutils 1.03.00
*/
private int nextValueLine = -2;
/**
* Construct a LabeledCSVParser on a CSVParse implementation.
*
* @param parse CSVParse implementation
* @throws IOException if an error occurs while reading.
*
* @since ostermillerutils 1.03.00
*/
public LabeledCSVParser(CSVParse parse) throws IOException {
this.parse = parse;
}
/**
* Change this parser so that it uses a new delimiter.
* <p>
* The initial character is a comma, the delimiter cannot be changed
* to a quote or other character that has special meaning in CSV.
*
* @param newDelim delimiter to which to switch.
* @throws BadDelimiterException if the character cannot be used as a delimiter.
*
* @since ostermillerutils 1.03.00
*/
public void changeDelimiter(char newDelim) throws BadDelimiterException {
parse.changeDelimiter(newDelim);
}
/**
* Change this parser so that it uses a new character for quoting.
* <p>
* The initial character is a double quote ("), the delimiter cannot be changed
* to a comma or other character that has special meaning in CSV.
*
* @param newQuote character to use for quoting.
* @throws BadQuoteException if the character cannot be used as a quote.
*
* @since ostermillerutils 1.03.00
*/
public void changeQuote(char newQuote) throws BadQuoteException {
parse.changeQuote(newQuote);
}
/**
* Get all the values from the file.
* <p>
* If the file has already been partially read, only the
* values that have not already been read will be included.
* <p>
* Each line of the file that has at least one value will be
* represented. Comments and empty lines are ignored.
* <p>
* The resulting double array may be jagged.
* <p>
* The last line of the values is saved and may be accessed
* by getValueByLabel().
*
* @return all the values from the file or null if there are no more values.
* @throws IOException if an error occurs while reading.
*
* @since ostermillerutils 1.03.00
*/
public String[][] getAllValues() throws IOException {
if (labels == null) setLabels();
String[][] allValues = parse.getAllValues();
if (allValues == null){
lastLine = null;
} else {
lastLine = allValues[allValues.length-1];
}
return allValues;
}
/**
* Get the line number that the last token came from.
* <p>
* New line breaks that occur in the middle of a token are not
* counted in the line number count.
* <p>
* The first line of labels does not count towards the line number.
*
* @return line number or -1 if no tokens have been returned yet.
*
* @since ostermillerutils 1.03.00
*/
public int getLastLineNumber(){
return lastLineNumber();
}
/**
* Get the line number that the last token came from.
* <p>
* New line breaks that occur in the middle of a token are not
* counted in the line number count.
* <p>
* The first line of labels does not count towards the line number.
*
* @return line number or -1 if no tokens have been returned yet.
*
* @since ostermillerutils 1.03.00
*/
public int lastLineNumber(){
int lineNum = parse.getLastLineNumber();
if (lineNum <= -1) return -1; // Nothing has been read yet
if (lineNum == 1) return -1; // only labels have been read
return lineNum - 1; // adjust line number to account for the label line
}
/**
* Get all the values from a line.
* <p>
* If the line has already been partially read, only the values that have not
* already been read will be included.
* <p>
* In addition to returning all the values from a line, LabeledCSVParser
* maintains a buffer of the values. This feature allows
* {@link #getValueByLabel(String)} to function. In this case
* {@link #getLine()} is used simply to iterate CSV data. The iteration ends
* when null is returned.
* <p>
* <b>Note:</b> The methods {@link #nextValue()} and {@link #getAllValues()}
* are incompatible with {@link #getValueByLabel(String)} because the former
* methods cause the offset of field values to shift and corrupt the internal
* buffer maintained by {@link #getLine}.
*
* @return all the values from the line or null if there are no more values.
* @throws IOException if an error occurs while reading.
*
* @since ostermillerutils 1.03.00
*/
public String[] getLine() throws IOException {
if (labels == null) setLabels();
lastLine = parse.getLine();
return lastLine;
}
/**
* Read the next value from the file. The line number from
* which this value was taken can be obtained from getLastLineNumber().
* <p>
* This method is not compatible with getValueByLabel(). Using this
* method will make getValueByLabel() throw an IllegalStateException
* for the rest of the line.
*
* @return the next value or null if there are no more values.
* @throws IOException if an error occurs while reading.
*
* @since ostermillerutils 1.03.00
*/
public String nextValue() throws IOException {
if (labels == null) setLabels();
String nextValue = parse.nextValue();
nextValueLine = getLastLineNumber();
return nextValue;
}
/**
* Initialize the LabeledCSVParser.labels member and LabeledCSVParser.labelMap
* member.
*
* @throws IOException if an IO error occurs
*
* @since ostermillerutils 1.03.00
*/
private void setLabels() throws IOException {
labels = parse.getLine();
if (labels == null) return;
labelMap = new HashMap<String,Integer>();
for (int i = 0; i < labels.length; i++){
labelMap.put(labels[i], new Integer(i));
}
}
/**
* Return an array of all field names from the top
* of the CSV file.
*
* @return Field names.
* @throws IOException if an IO error occurs
*
* @since ostermillerutils 1.03.00
*/
public String[] getLabels() throws IOException {
if (labels == null) setLabels();
return labels;
}
/**
* Get the index of the column having the given label.
* The {@link #getLine()} method returns an
* array of field values for a single record of data. This method returns
* the index of a member of that array based on the specified field name.
* The first field has the index 0.
*
* @param label The field name.
* @return The index of the field name, or -1 if the label does not exist.
* @deprecated may swallow an IOException while reading the labels - please use getLabelIdx()
*
* @since ostermillerutils 1.03.00
*/
@Deprecated public int getLabelIndex(String label){
try {
return getLabelIdx(label);
} catch (IOException iox){
return -1;
}
}
/**
* Get the index of the column having the given label.
* The {@link #getLine()} method returns an
* array of field values for a single record of data. This method returns
* the index of a member of that array based on the specified field name.
* The first field has the index 0.
*
* @param label The field name.
* @return The index of the field name, or -1 if the label does not exist.
* @throws IOException if an IO error occurs
*
* @since ostermillerutils 1.04.02
*/
public int getLabelIdx(String label) throws IOException {
if (labels == null) setLabels();
if (labelMap == null) return -1;
if (!labelMap.containsKey(label)) return -1;
return (labelMap.get(label)).intValue();
}
/**
* Given the label for the column, get the column from the last line that
* was read. If the column cannot be found in the line, null is returned.
*
* @param label The field name.
* @throws IllegalStateException if nextValue has been called as part of getting the last line. nextValue is not compatible with this method.
* @return the value from the last line read or null if there is no such value
*
* @since ostermillerutils 1.03.00
*/
public String getValueByLabel(String label) throws IllegalStateException {
if (nextValueLine == getLastLineNumber()) throw new IllegalStateException("nextValue() was used to get values from this line.");
if (lastLine == null) return null;
int fieldIndex;
try {
fieldIndex = getLabelIdx(label);
} catch (IOException iox){
// Can't happen here because the labels have been read before the first line.
throw new RuntimeException(iox);
}
if (fieldIndex == -1) return null;
if (fieldIndex >= lastLine.length) return null;
return lastLine[fieldIndex];
}
/**
* Close any stream upon which this parser is based.
*
* @throws IOException if an error occurs while closing the stream.
*
* @since ostermillerutils 1.03.00
*/
public void close() throws IOException {
parse.close();
}
}