SqlLoaderControlParserImpl.java

  1. /*
  2.  *
  3.  * The DbUnit Database Testing Framework
  4.  * Copyright (C)2002-2008, DbUnit.org
  5.  *
  6.  * This library is free software; you can redistribute it and/or
  7.  * modify it under the terms of the GNU Lesser General Public
  8.  * License as published by the Free Software Foundation; either
  9.  * version 2.1 of the License, or (at your option) any later version.
  10.  *
  11.  * This library is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14.  * Lesser General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU Lesser General Public
  17.  * License along with this library; if not, write to the Free Software
  18.  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19.  *
  20.  */
  21. package org.dbunit.dataset.sqlloader;

  22. import java.io.File;
  23. import java.io.FileInputStream;
  24. import java.io.IOException;
  25. import java.io.InputStreamReader;
  26. import java.io.LineNumberReader;
  27. import java.net.URL;
  28. import java.nio.MappedByteBuffer;
  29. import java.nio.channels.FileChannel;
  30. import java.text.CharacterIterator;
  31. import java.text.StringCharacterIterator;
  32. import java.util.ArrayList;
  33. import java.util.List;
  34. import java.util.StringTokenizer;
  35. import java.util.regex.Matcher;
  36. import java.util.regex.Pattern;

  37. import org.dbunit.dataset.common.handlers.EscapeHandler;
  38. import org.dbunit.dataset.common.handlers.IllegalInputCharacterException;
  39. import org.dbunit.dataset.common.handlers.IsAlnumHandler;
  40. import org.dbunit.dataset.common.handlers.Pipeline;
  41. import org.dbunit.dataset.common.handlers.PipelineException;
  42. import org.dbunit.dataset.common.handlers.QuoteHandler;
  43. import org.dbunit.dataset.common.handlers.SeparatorHandler;
  44. import org.dbunit.dataset.common.handlers.TransparentHandler;
  45. import org.dbunit.dataset.common.handlers.WhitespacesHandler;
  46. import org.slf4j.Logger;
  47. import org.slf4j.LoggerFactory;

  48. /**
  49.  * Parser which parses Oracle SQLLoader files.
  50.  *
  51.  * @author Stephan Strittmatter (stritti AT users.sourceforge.net)
  52.  * @author Last changed by: $Author$
  53.  * @version $Revision$ $Date$
  54.  * @since 2.4.0
  55.  */
  56. public class SqlLoaderControlParserImpl implements SqlLoaderControlParser {

  57.     public static final char SEPARATOR_CHAR = ';';
  58.    
  59.     /** The pipeline. */
  60.     private Pipeline pipeline;

  61.     private String tableName;

  62. //    private String fieldTerminator;
  63. //
  64. //    private String fieldEnclosure;

  65.     private boolean hasTrailingNullCols;

  66.     /**
  67.      * Logger for this class
  68.      */
  69.     private static final Logger logger = LoggerFactory.getLogger(SqlLoaderControlParserImpl.class);

  70.    
  71.     /**
  72.      * The Constructor.
  73.      */
  74.     public SqlLoaderControlParserImpl() {

  75.         resetThePipeline();

  76.     }

  77.     /**
  78.      * Reset the pipeline.
  79.      */
  80.     private void resetThePipeline() {
  81.         logger.debug("resetThePipeline() - start");
  82.        
  83.         this.pipeline = new Pipeline();
  84.         this.pipeline.getPipelineConfig().setSeparatorChar(SEPARATOR_CHAR);
  85.        
  86.         //TODO add this.fieldEnclosure
  87.         getPipeline().putFront(SeparatorHandler.ENDPIECE());
  88.         getPipeline().putFront(EscapeHandler.ACCEPT());
  89.         getPipeline().putFront(IsAlnumHandler.QUOTE());
  90.         getPipeline().putFront(QuoteHandler.QUOTE());
  91.         getPipeline().putFront(EscapeHandler.ESCAPE());
  92.         getPipeline().putFront(WhitespacesHandler.IGNORE());
  93.         getPipeline().putFront(TransparentHandler.IGNORE());

  94.     }

  95.     /**
  96.      * Parse.
  97.      *
  98.      * @param csv the csv
  99.      *
  100.      * @return the list
  101.      *
  102.      * @throws IllegalInputCharacterException the illegal input character exception
  103.      * @throws PipelineException the pipeline exception
  104.      *
  105.      * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.lang.String)
  106.      */
  107.     public List parse(String csv) throws PipelineException, IllegalInputCharacterException {
  108.         logger.debug("parse(csv={}) - start", csv);

  109.         getPipeline().resetProducts();
  110.         CharacterIterator iterator = new StringCharacterIterator(csv);
  111.         for (char c = iterator.first(); c != CharacterIterator.DONE; c = iterator.next()) {
  112.             getPipeline().handle(c);
  113.         }
  114.         getPipeline().noMoreInput();
  115.         getPipeline().thePieceIsDone();

  116.         return getPipeline().getProducts();
  117.     }

  118.     /**
  119.      * Parse.
  120.      *
  121.      * @param url the URL
  122.      *
  123.      * @return the list
  124.      *
  125.      * @throws IOException the IO exception
  126.      * @throws SqlLoaderControlParserException the oracle control parser exception
  127.      *
  128.      * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.net.URL)
  129.      */
  130.     public List parse(URL url) throws IOException, SqlLoaderControlParserException {
  131.         logger.debug("parse(url={}) - start", url);
  132.         return parse(new File(url.toString()));
  133.     }

  134.     /**
  135.      * Parse.
  136.      *
  137.      * @param controlFile the source
  138.      *
  139.      * @return the list of column names as Strings
  140.      *
  141.      * @throws IOException the IO exception
  142.      * @throws SqlLoaderControlParserException the oracle control parser exception
  143.      * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.io.File)
  144.      */
  145.     public List parse(File controlFile)
  146.     throws IOException, SqlLoaderControlParserException
  147.     {
  148.         logger.debug("parse(controlFile={}) - start", controlFile);

  149.         FileInputStream fis = new FileInputStream(controlFile);

  150.         FileChannel fc = fis.getChannel();

  151.         MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
  152.         byte[] barray = new byte[(int) (fc.size())];
  153.         mbf.get(barray);

  154.         String lines = new String(barray); //one big string

  155.         lines = lines.replaceAll("\r", ""); //unify to UNIX style to have easier regexp transformations.

  156.         if (parseForRegexp(lines, "(LOAD\\sDATA).*") != null) {

  157.             String fileName = parseForRegexp(lines, ".*INFILE\\s'(.*?)'.*");
  158.             File dataFile = resolveFile(controlFile.getParentFile(), fileName);

  159.             this.tableName = parseForRegexp(lines, ".*INTO\\sTABLE\\s(.*?)\\s.*");

  160. //            this.fieldTerminator = parseForRegexp(lines, ".*TERMINATED BY [\"|'](.*?)[\"|'].*");
  161. //
  162. //            this.fieldEnclosure = parseForRegexp(lines, ".*OPTIONALLY ENCLOSED BY '(.*?)'.*");

  163.             if (parseForRegexp(lines, ".*(TRAILING NULLCOLS).*") != "") {
  164.                 this.hasTrailingNullCols = true;
  165.             }
  166.             else {
  167.                 this.hasTrailingNullCols = false;
  168.             }
  169.            
  170.             List rows = new ArrayList();
  171.             List columnList = parseColumns(lines, rows);

  172.             LineNumberReader lineNumberReader =
  173.                 new LineNumberReader(new InputStreamReader(new FileInputStream(dataFile)));
  174.             try {
  175.                 parseTheData(columnList, lineNumberReader, rows);
  176.             }
  177.             finally {
  178.                 lineNumberReader.close();
  179.             }

  180.             return rows;
  181.         }
  182.         else {
  183.             throw new SqlLoaderControlParserException("Control file "
  184.                     + controlFile + " not starting using 'LOAD DATA'");
  185.         }
  186.     }

  187.     private File resolveFile(File parentDir, String fileName) {
  188.         // Initially assume that we have an absolute fileName
  189.         File dataFile = new File(fileName);
  190.        
  191.         // If fileName was not absolute build it using the given parent
  192.         if(!dataFile.isAbsolute()) {
  193.             fileName = fileName.replaceAll("\\\\", "/");
  194.             // remove "./" characters from name at the beginning if needed
  195.             if(fileName.startsWith("./")){
  196.                 fileName = fileName.substring(2);
  197.             }
  198.             // remove "." character from name at the beginning if needed
  199.             if(fileName.startsWith(".")){
  200.                 fileName = fileName.substring(1);
  201.             }
  202.             dataFile = new File(parentDir, fileName);
  203.         }
  204.         return dataFile;
  205.     }

  206.     protected String parseForRegexp(String controlFileContent, String regexp)
  207.     throws IOException
  208.     {
  209.         logger.debug("parseForRegexp(controlFileContent={}, regexp={}) - start", controlFileContent, regexp);

  210.         if (controlFileContent == null) {
  211.             throw new NullPointerException("control file has no content");
  212.         }

  213.         final Pattern pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
  214.         final Matcher matches = pattern.matcher(controlFileContent);

  215.         if (matches.find()) {
  216.             String inFileLine = matches.group(1);

  217.             return inFileLine;
  218.         }
  219.         else {
  220.             return null;
  221.         }
  222.     }

  223.     /**
  224.      * parse the first line of data from the given source.
  225.      *
  226.      * @param rows the rows
  227.      * @param lineNumberReader the line number reader
  228.      * @param controlFile the source
  229.      *
  230.      * @return the list of column names as Strings
  231.      *
  232.      * @throws IOException the IO exception
  233.      * @throws SqlLoaderControlParserException the oracle control parser exception
  234.      */
  235.     private List parseColumns(String controlFileContent, List rows) throws IOException,
  236.     SqlLoaderControlParserException
  237.     {
  238.         logger.debug("parseColumns(controlFileContent={}, rows={}) - start", controlFileContent, rows);

  239.         List columnList;

  240.         final Pattern pattern =
  241.             Pattern
  242.             .compile(".*FIELDS\\s.*\\(\\n(.*?)\\n\\)", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
  243.         final Matcher matches = pattern.matcher(controlFileContent);

  244.         if (matches.find()) {
  245.             String columnFragment = matches.group(1);
  246.             //firstLine = firstLine.replaceAll("(\n|\r)", "");

  247.             columnList = new ArrayList();

  248.             columnFragment = columnFragment.replaceAll("\".*?\"", "");
  249.             columnFragment = columnFragment.replaceAll("\n", "");

  250.             StringTokenizer tok = new StringTokenizer(columnFragment, ",");

  251.             while (tok.hasMoreElements()) {

  252.                 String col = (String) tok.nextElement();
  253.                 col = parseForRegexp(col, ".*^([a-zA-Z0-9_]*)\\s").trim(); //column is the first part.
  254.                 columnList.add(col);
  255.             }

  256.             //columnsInFirstLine = parse(firstLine);
  257.             rows.add(columnList);
  258.         }

  259.         else {
  260.             columnList = null;
  261.         }

  262.         return columnList;
  263.     }

  264.     /**
  265.      * Parses the the data.
  266.      *
  267.      * @param rows the rows
  268.      * @param columnList the columns in first line
  269.      * @param lineNumberReader the line number reader
  270.      *
  271.      * @throws IOException the IO exception
  272.      * @throws SqlLoaderControlParserException the oracle control parser exception
  273.      */
  274.     private void parseTheData(final List columnList, LineNumberReader lineNumberReader, List rows)
  275.     throws IOException, SqlLoaderControlParserException
  276.     {
  277.         if(logger.isDebugEnabled())
  278.             logger.debug("parseTheData(columnList={}, lineNumberReader={}, rows={}) - start",
  279.                     new Object[] {columnList, lineNumberReader, rows} );

  280.         int nColumns = columnList.size();
  281.         List columns;
  282.         while ((columns = collectExpectedNumberOfColumns(nColumns, lineNumberReader)) != null) {
  283.             rows.add(columns);
  284.         }
  285.     }

  286.     /**
  287.      * Collect expected number of columns.
  288.      *
  289.      * @param expectedNumberOfColumns the expected number of columns
  290.      * @param lineNumberReader the line number reader
  291.      *
  292.      * @return the list
  293.      *
  294.      * @throws IOException the IO exception
  295.      * @throws SqlLoaderControlParserException the oracle control parser exception
  296.      */
  297.     private List collectExpectedNumberOfColumns(
  298.             int expectedNumberOfColumns,
  299.             LineNumberReader lineNumberReader) throws IOException, SqlLoaderControlParserException
  300.     {
  301.         if(logger.isDebugEnabled())
  302.             logger.debug("collectExpectedNumberOfColumns(expectedNumberOfColumns={}, lineNumberReader={}) - start",
  303.                 String.valueOf(expectedNumberOfColumns), lineNumberReader);

  304.         String anotherLine = lineNumberReader.readLine();
  305.         if (anotherLine == null) {
  306.             return null;
  307.         }

  308.         List columns = null;
  309.         int columnsCollectedSoFar = 0;
  310.         final StringBuilder buffer = new StringBuilder();
  311.         boolean shouldProceed = false;
  312.         while (columnsCollectedSoFar < expectedNumberOfColumns) {
  313.             try {
  314.                 buffer.append(anotherLine);
  315.                 columns = parse(buffer.toString());
  316.                 columnsCollectedSoFar = columns.size();
  317.             }
  318.             catch (IllegalStateException e) {
  319.                 resetThePipeline();
  320.                 anotherLine = lineNumberReader.readLine();
  321.                 if (anotherLine == null) {
  322.                     break;
  323.                 }
  324.                 buffer.append("\n");
  325.                 shouldProceed = true;
  326.             }
  327.             if (!shouldProceed) {
  328.                 break;
  329.             }
  330.         }
  331.        
  332.         if (columnsCollectedSoFar != expectedNumberOfColumns) {
  333.             if (this.hasTrailingNullCols) {
  334.                 columns.add(SqlLoaderControlProducer.NULL);
  335.             }
  336.             else {

  337.                 String message =
  338.                     new StringBuilder("Expected ")
  339.                         .append(expectedNumberOfColumns).append(" columns on line ")
  340.                         .append(lineNumberReader.getLineNumber()).append(", got ")
  341.                         .append(columnsCollectedSoFar).append(". Offending line: ").append(buffer)
  342.                         .toString();
  343.                 throw new SqlLoaderControlParserException(message);
  344.             }
  345.         }
  346.         return columns;
  347.     }

  348.     /**
  349.      * Gets the pipeline.
  350.      *
  351.      * @return the pipeline
  352.      */
  353.     Pipeline getPipeline()
  354.     {
  355.         return this.pipeline;
  356.     }

  357.     /**
  358.      * Sets the pipeline.
  359.      *
  360.      * @param pipeline the pipeline
  361.      */
  362.     void setPipeline(Pipeline pipeline)
  363.     {
  364.         this.pipeline = pipeline;
  365.     }

  366.     public String getTableName()
  367.     {
  368.         return this.tableName;
  369.     }
  370. }