SqlLoaderControlParserImpl.java

/*
 *
 * The DbUnit Database Testing Framework
 * Copyright (C)2002-2008, DbUnit.org
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */
package org.dbunit.dataset.sqlloader;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.net.URL;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.dbunit.dataset.common.handlers.EscapeHandler;
import org.dbunit.dataset.common.handlers.IllegalInputCharacterException;
import org.dbunit.dataset.common.handlers.IsAlnumHandler;
import org.dbunit.dataset.common.handlers.Pipeline;
import org.dbunit.dataset.common.handlers.PipelineException;
import org.dbunit.dataset.common.handlers.QuoteHandler;
import org.dbunit.dataset.common.handlers.SeparatorHandler;
import org.dbunit.dataset.common.handlers.TransparentHandler;
import org.dbunit.dataset.common.handlers.WhitespacesHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Parser which parses Oracle SQLLoader files.
 * 
 * @author Stephan Strittmatter (stritti AT users.sourceforge.net)
 * @author Last changed by: $Author$
 * @version $Revision$ $Date$
 * @since 2.4.0
 */
public class SqlLoaderControlParserImpl implements SqlLoaderControlParser {

    public static final char SEPARATOR_CHAR = ';';
    
    /** The pipeline. */
    private Pipeline pipeline;

    private String tableName;

//    private String fieldTerminator;
//
//    private String fieldEnclosure;

    private boolean hasTrailingNullCols;

    /**
     * Logger for this class
     */
    private static final Logger logger = LoggerFactory.getLogger(SqlLoaderControlParserImpl.class);

    
    /**
     * The Constructor.
     */
    public SqlLoaderControlParserImpl() {

        resetThePipeline();

    }

    /**
     * Reset the pipeline.
     */
    private void resetThePipeline() {
        logger.debug("resetThePipeline() - start");
        
        this.pipeline = new Pipeline();
        this.pipeline.getPipelineConfig().setSeparatorChar(SEPARATOR_CHAR);
        
        //TODO add this.fieldEnclosure
        getPipeline().putFront(SeparatorHandler.ENDPIECE());
        getPipeline().putFront(EscapeHandler.ACCEPT());
        getPipeline().putFront(IsAlnumHandler.QUOTE());
        getPipeline().putFront(QuoteHandler.QUOTE());
        getPipeline().putFront(EscapeHandler.ESCAPE());
        getPipeline().putFront(WhitespacesHandler.IGNORE());
        getPipeline().putFront(TransparentHandler.IGNORE());

    }

    /**
     * Parse.
     * 
     * @param csv the csv
     * 
     * @return the list
     * 
     * @throws IllegalInputCharacterException the illegal input character exception
     * @throws PipelineException the pipeline exception
     * 
     * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.lang.String)
     */
    public List parse(String csv) throws PipelineException, IllegalInputCharacterException {
        logger.debug("parse(csv={}) - start", csv);

        getPipeline().resetProducts();
        CharacterIterator iterator = new StringCharacterIterator(csv);
        for (char c = iterator.first(); c != CharacterIterator.DONE; c = iterator.next()) {
            getPipeline().handle(c);
        }
        getPipeline().noMoreInput();
        getPipeline().thePieceIsDone();

        return getPipeline().getProducts();
    }

    /**
     * Parse.
     * 
     * @param url the URL
     * 
     * @return the list
     * 
     * @throws IOException the IO exception
     * @throws SqlLoaderControlParserException the oracle control parser exception
     * 
     * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.net.URL)
     */
    public List parse(URL url) throws IOException, SqlLoaderControlParserException {
        logger.debug("parse(url={}) - start", url);
        return parse(new File(url.toString()));
    }

    /**
     * Parse.
     * 
     * @param controlFile the source
     * 
     * @return the list of column names as Strings
     * 
     * @throws IOException the IO exception
     * @throws SqlLoaderControlParserException the oracle control parser exception
     * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.io.File)
     */
    public List parse(File controlFile) 
    throws IOException, SqlLoaderControlParserException 
    {
        logger.debug("parse(controlFile={}) - start", controlFile);

        FileInputStream fis = new FileInputStream(controlFile);

        FileChannel fc = fis.getChannel();

        MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
        byte[] barray = new byte[(int) (fc.size())];
        mbf.get(barray);

        String lines = new String(barray); //one big string

        lines = lines.replaceAll("\r", ""); //unify to UNIX style to have easier regexp transformations.

        if (parseForRegexp(lines, "(LOAD\\sDATA).*") != null) {

        	String fileName = parseForRegexp(lines, ".*INFILE\\s'(.*?)'.*");
        	File dataFile = resolveFile(controlFile.getParentFile(), fileName);

            this.tableName = parseForRegexp(lines, ".*INTO\\sTABLE\\s(.*?)\\s.*");

//            this.fieldTerminator = parseForRegexp(lines, ".*TERMINATED BY [\"|'](.*?)[\"|'].*");
//
//            this.fieldEnclosure = parseForRegexp(lines, ".*OPTIONALLY ENCLOSED BY '(.*?)'.*");

            if (parseForRegexp(lines, ".*(TRAILING NULLCOLS).*") != "") {
                this.hasTrailingNullCols = true;
            }
            else {
                this.hasTrailingNullCols = false;
            }
            
            List rows = new ArrayList();
            List columnList = parseColumns(lines, rows);

            LineNumberReader lineNumberReader =
                new LineNumberReader(new InputStreamReader(new FileInputStream(dataFile)));
            try {
                parseTheData(columnList, lineNumberReader, rows);
            }
            finally {
                lineNumberReader.close();
            }

            return rows;
        }
        else {
            throw new SqlLoaderControlParserException("Control file "
                    + controlFile + " not starting using 'LOAD DATA'");
        }
    }

    private File resolveFile(File parentDir, String fileName) {
    	// Initially assume that we have an absolute fileName
    	File dataFile = new File(fileName);
    	
    	// If fileName was not absolute build it using the given parent
    	if(!dataFile.isAbsolute()) {
    		fileName = fileName.replaceAll("\\\\", "/");
    		// remove "./" characters from name at the beginning if needed
    		if(fileName.startsWith("./")){
    			fileName = fileName.substring(2);
    		}
    		// remove "." character from name at the beginning if needed
    		if(fileName.startsWith(".")){
    			fileName = fileName.substring(1);
    		}
    		dataFile = new File(parentDir, fileName);
    	}
    	return dataFile;
	}

	protected String parseForRegexp(String controlFileContent, String regexp) 
    throws IOException 
    {
        logger.debug("parseForRegexp(controlFileContent={}, regexp={}) - start", controlFileContent, regexp);

        if (controlFileContent == null) {
            throw new NullPointerException("control file has no content");
        }

        final Pattern pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
        final Matcher matches = pattern.matcher(controlFileContent);

        if (matches.find()) {
            String inFileLine = matches.group(1);

            return inFileLine;
        }
        else {
            return null;
        }
    }

    /**
     * parse the first line of data from the given source.
     * 
     * @param rows the rows
     * @param lineNumberReader the line number reader
     * @param controlFile the source
     * 
     * @return the list of column names as Strings
     * 
     * @throws IOException the IO exception
     * @throws SqlLoaderControlParserException the oracle control parser exception
     */
    private List parseColumns(String controlFileContent, List rows) throws IOException,
    SqlLoaderControlParserException 
    {
        logger.debug("parseColumns(controlFileContent={}, rows={}) - start", controlFileContent, rows);

        List columnList;

        final Pattern pattern =
            Pattern
            .compile(".*FIELDS\\s.*\\(\\n(.*?)\\n\\)", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
        final Matcher matches = pattern.matcher(controlFileContent);

        if (matches.find()) {
            String columnFragment = matches.group(1);
            //firstLine = firstLine.replaceAll("(\n|\r)", "");

            columnList = new ArrayList();

            columnFragment = columnFragment.replaceAll("\".*?\"", "");
            columnFragment = columnFragment.replaceAll("\n", "");

            StringTokenizer tok = new StringTokenizer(columnFragment, ",");

            while (tok.hasMoreElements()) {

                String col = (String) tok.nextElement();
                col = parseForRegexp(col, ".*^([a-zA-Z0-9_]*)\\s").trim(); //column is the first part.
                columnList.add(col);
            }

            //columnsInFirstLine = parse(firstLine);
            rows.add(columnList);
        }

        else {
            columnList = null;
        }

        return columnList;
    }

    /**
     * Parses the the data.
     * 
     * @param rows the rows
     * @param columnList the columns in first line
     * @param lineNumberReader the line number reader
     * 
     * @throws IOException the IO exception
     * @throws SqlLoaderControlParserException the oracle control parser exception
     */
    private void parseTheData(final List columnList, LineNumberReader lineNumberReader, List rows)
    throws IOException, SqlLoaderControlParserException 
    {
        if(logger.isDebugEnabled())
            logger.debug("parseTheData(columnList={}, lineNumberReader={}, rows={}) - start", 
                    new Object[] {columnList, lineNumberReader, rows} );

        int nColumns = columnList.size();
        List columns;
        while ((columns = collectExpectedNumberOfColumns(nColumns, lineNumberReader)) != null) {
            rows.add(columns);
        }
    }

    /**
     * Collect expected number of columns.
     * 
     * @param expectedNumberOfColumns the expected number of columns
     * @param lineNumberReader the line number reader
     * 
     * @return the list
     * 
     * @throws IOException the IO exception
     * @throws SqlLoaderControlParserException the oracle control parser exception
     */
    private List collectExpectedNumberOfColumns(
            int expectedNumberOfColumns,
            LineNumberReader lineNumberReader) throws IOException, SqlLoaderControlParserException 
    {
        if(logger.isDebugEnabled())
            logger.debug("collectExpectedNumberOfColumns(expectedNumberOfColumns={}, lineNumberReader={}) - start", 
                String.valueOf(expectedNumberOfColumns), lineNumberReader);

        String anotherLine = lineNumberReader.readLine();
        if (anotherLine == null) {
            return null;
        }

        List columns = null;
        int columnsCollectedSoFar = 0;
        StringBuffer buffer = new StringBuffer();
        boolean shouldProceed = false;
        while (columnsCollectedSoFar < expectedNumberOfColumns) {
            try {
                buffer.append(anotherLine);
                columns = parse(buffer.toString());
                columnsCollectedSoFar = columns.size();
            }
            catch (IllegalStateException e) {
                resetThePipeline();
                anotherLine = lineNumberReader.readLine();
                if (anotherLine == null) {
                    break;
                }
                buffer.append("\n");
                shouldProceed = true;
            }
            if (!shouldProceed) {
                break;
            }
        }
        
        if (columnsCollectedSoFar != expectedNumberOfColumns) {
            if (this.hasTrailingNullCols) {
                columns.add(SqlLoaderControlProducer.NULL);
            }
            else {

                String message =
                    new StringBuffer("Expected ")
                        .append(expectedNumberOfColumns).append(" columns on line ")
                        .append(lineNumberReader.getLineNumber()).append(", got ")
                        .append(columnsCollectedSoFar).append(". Offending line: ").append(buffer)
                        .toString();
                throw new SqlLoaderControlParserException(message);
            }
        }
        return columns;
    }

    /**
     * Gets the pipeline.
     * 
     * @return the pipeline
     */
    Pipeline getPipeline() 
    {
        return this.pipeline;
    }

    /**
     * Sets the pipeline.
     * 
     * @param pipeline the pipeline
     */
    void setPipeline(Pipeline pipeline) 
    {
        this.pipeline = pipeline;
    }

    public String getTableName() 
    {
        return this.tableName;
    }
}