View Javadoc
1   /*
2    *
3    * The DbUnit Database Testing Framework
4    * Copyright (C)2002-2008, DbUnit.org
5    *
6    * This library is free software; you can redistribute it and/or
7    * modify it under the terms of the GNU Lesser General Public
8    * License as published by the Free Software Foundation; either
9    * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, write to the Free Software
18   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19   *
20   */
21  package org.dbunit.dataset.sqlloader;
22  
23  import java.io.File;
24  import java.io.FileInputStream;
25  import java.io.IOException;
26  import java.io.InputStreamReader;
27  import java.io.LineNumberReader;
28  import java.net.URL;
29  import java.nio.MappedByteBuffer;
30  import java.nio.channels.FileChannel;
31  import java.text.CharacterIterator;
32  import java.text.StringCharacterIterator;
33  import java.util.ArrayList;
34  import java.util.List;
35  import java.util.StringTokenizer;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.dbunit.dataset.common.handlers.EscapeHandler;
40  import org.dbunit.dataset.common.handlers.IllegalInputCharacterException;
41  import org.dbunit.dataset.common.handlers.IsAlnumHandler;
42  import org.dbunit.dataset.common.handlers.Pipeline;
43  import org.dbunit.dataset.common.handlers.PipelineException;
44  import org.dbunit.dataset.common.handlers.QuoteHandler;
45  import org.dbunit.dataset.common.handlers.SeparatorHandler;
46  import org.dbunit.dataset.common.handlers.TransparentHandler;
47  import org.dbunit.dataset.common.handlers.WhitespacesHandler;
48  import org.slf4j.Logger;
49  import org.slf4j.LoggerFactory;
50  
51  /**
52   * Parser which parses Oracle SQLLoader files.
53   * 
54   * @author Stephan Strittmatter (stritti AT users.sourceforge.net)
55   * @author Last changed by: $Author$
56   * @version $Revision$ $Date$
57   * @since 2.4.0
58   */
59  public class SqlLoaderControlParserImpl implements SqlLoaderControlParser {
60  
61      public static final char SEPARATOR_CHAR = ';';
62      
63      /** The pipeline. */
64      private Pipeline pipeline;
65  
66      private String tableName;
67  
68  //    private String fieldTerminator;
69  //
70  //    private String fieldEnclosure;
71  
72      private boolean hasTrailingNullCols;
73  
74      /**
75       * Logger for this class
76       */
77      private static final Logger logger = LoggerFactory.getLogger(SqlLoaderControlParserImpl.class);
78  
79      
80      /**
81       * The Constructor.
82       */
83      public SqlLoaderControlParserImpl() {
84  
85          resetThePipeline();
86  
87      }
88  
89      /**
90       * Reset the pipeline.
91       */
92      private void resetThePipeline() {
93          logger.debug("resetThePipeline() - start");
94          
95          this.pipeline = new Pipeline();
96          this.pipeline.getPipelineConfig().setSeparatorChar(SEPARATOR_CHAR);
97          
98          //TODO add this.fieldEnclosure
99          getPipeline().putFront(SeparatorHandler.ENDPIECE());
100         getPipeline().putFront(EscapeHandler.ACCEPT());
101         getPipeline().putFront(IsAlnumHandler.QUOTE());
102         getPipeline().putFront(QuoteHandler.QUOTE());
103         getPipeline().putFront(EscapeHandler.ESCAPE());
104         getPipeline().putFront(WhitespacesHandler.IGNORE());
105         getPipeline().putFront(TransparentHandler.IGNORE());
106 
107     }
108 
109     /**
110      * Parse.
111      * 
112      * @param csv the csv
113      * 
114      * @return the list
115      * 
116      * @throws IllegalInputCharacterException the illegal input character exception
117      * @throws PipelineException the pipeline exception
118      * 
119      * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.lang.String)
120      */
121     public List parse(String csv) throws PipelineException, IllegalInputCharacterException {
122         logger.debug("parse(csv={}) - start", csv);
123 
124         getPipeline().resetProducts();
125         CharacterIterator iterator = new StringCharacterIterator(csv);
126         for (char c = iterator.first(); c != CharacterIterator.DONE; c = iterator.next()) {
127             getPipeline().handle(c);
128         }
129         getPipeline().noMoreInput();
130         getPipeline().thePieceIsDone();
131 
132         return getPipeline().getProducts();
133     }
134 
135     /**
136      * Parse.
137      * 
138      * @param url the URL
139      * 
140      * @return the list
141      * 
142      * @throws IOException the IO exception
143      * @throws SqlLoaderControlParserException the oracle control parser exception
144      * 
145      * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.net.URL)
146      */
147     public List parse(URL url) throws IOException, SqlLoaderControlParserException {
148         logger.debug("parse(url={}) - start", url);
149         return parse(new File(url.toString()));
150     }
151 
152     /**
153      * Parse.
154      * 
155      * @param controlFile the source
156      * 
157      * @return the list of column names as Strings
158      * 
159      * @throws IOException the IO exception
160      * @throws SqlLoaderControlParserException the oracle control parser exception
161      * @see org.dbunit.dataset.sqlloader.SqlLoaderControlParser#parse(java.io.File)
162      */
163     public List parse(File controlFile) 
164     throws IOException, SqlLoaderControlParserException 
165     {
166         logger.debug("parse(controlFile={}) - start", controlFile);
167 
168         FileInputStream fis = new FileInputStream(controlFile);
169 
170         FileChannel fc = fis.getChannel();
171 
172         MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
173         byte[] barray = new byte[(int) (fc.size())];
174         mbf.get(barray);
175 
176         String lines = new String(barray); //one big string
177 
178         lines = lines.replaceAll("\r", ""); //unify to UNIX style to have easier regexp transformations.
179 
180         if (parseForRegexp(lines, "(LOAD\\sDATA).*") != null) {
181 
182         	String fileName = parseForRegexp(lines, ".*INFILE\\s'(.*?)'.*");
183         	File dataFile = resolveFile(controlFile.getParentFile(), fileName);
184 
185             this.tableName = parseForRegexp(lines, ".*INTO\\sTABLE\\s(.*?)\\s.*");
186 
187 //            this.fieldTerminator = parseForRegexp(lines, ".*TERMINATED BY [\"|'](.*?)[\"|'].*");
188 //
189 //            this.fieldEnclosure = parseForRegexp(lines, ".*OPTIONALLY ENCLOSED BY '(.*?)'.*");
190 
191             if (parseForRegexp(lines, ".*(TRAILING NULLCOLS).*") != "") {
192                 this.hasTrailingNullCols = true;
193             }
194             else {
195                 this.hasTrailingNullCols = false;
196             }
197             
198             List rows = new ArrayList();
199             List columnList = parseColumns(lines, rows);
200 
201             LineNumberReader lineNumberReader =
202                 new LineNumberReader(new InputStreamReader(new FileInputStream(dataFile)));
203             try {
204                 parseTheData(columnList, lineNumberReader, rows);
205             }
206             finally {
207                 lineNumberReader.close();
208             }
209 
210             return rows;
211         }
212         else {
213             throw new SqlLoaderControlParserException("Control file "
214                     + controlFile + " not starting using 'LOAD DATA'");
215         }
216     }
217 
218     private File resolveFile(File parentDir, String fileName) {
219     	// Initially assume that we have an absolute fileName
220     	File dataFile = new File(fileName);
221     	
222     	// If fileName was not absolute build it using the given parent
223     	if(!dataFile.isAbsolute()) {
224     		fileName = fileName.replaceAll("\\\\", "/");
225     		// remove "./" characters from name at the beginning if needed
226     		if(fileName.startsWith("./")){
227     			fileName = fileName.substring(2);
228     		}
229     		// remove "." character from name at the beginning if needed
230     		if(fileName.startsWith(".")){
231     			fileName = fileName.substring(1);
232     		}
233     		dataFile = new File(parentDir, fileName);
234     	}
235     	return dataFile;
236 	}
237 
238 	protected String parseForRegexp(String controlFileContent, String regexp) 
239     throws IOException 
240     {
241         logger.debug("parseForRegexp(controlFileContent={}, regexp={}) - start", controlFileContent, regexp);
242 
243         if (controlFileContent == null) {
244             throw new NullPointerException("control file has no content");
245         }
246 
247         final Pattern pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
248         final Matcher matches = pattern.matcher(controlFileContent);
249 
250         if (matches.find()) {
251             String inFileLine = matches.group(1);
252 
253             return inFileLine;
254         }
255         else {
256             return null;
257         }
258     }
259 
260     /**
261      * parse the first line of data from the given source.
262      * 
263      * @param rows the rows
264      * @param lineNumberReader the line number reader
265      * @param controlFile the source
266      * 
267      * @return the list of column names as Strings
268      * 
269      * @throws IOException the IO exception
270      * @throws SqlLoaderControlParserException the oracle control parser exception
271      */
272     private List parseColumns(String controlFileContent, List rows) throws IOException,
273     SqlLoaderControlParserException 
274     {
275         logger.debug("parseColumns(controlFileContent={}, rows={}) - start", controlFileContent, rows);
276 
277         List columnList;
278 
279         final Pattern pattern =
280             Pattern
281             .compile(".*FIELDS\\s.*\\(\\n(.*?)\\n\\)", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
282         final Matcher matches = pattern.matcher(controlFileContent);
283 
284         if (matches.find()) {
285             String columnFragment = matches.group(1);
286             //firstLine = firstLine.replaceAll("(\n|\r)", "");
287 
288             columnList = new ArrayList();
289 
290             columnFragment = columnFragment.replaceAll("\".*?\"", "");
291             columnFragment = columnFragment.replaceAll("\n", "");
292 
293             StringTokenizer tok = new StringTokenizer(columnFragment, ",");
294 
295             while (tok.hasMoreElements()) {
296 
297                 String col = (String) tok.nextElement();
298                 col = parseForRegexp(col, ".*^([a-zA-Z0-9_]*)\\s").trim(); //column is the first part.
299                 columnList.add(col);
300             }
301 
302             //columnsInFirstLine = parse(firstLine);
303             rows.add(columnList);
304         }
305 
306         else {
307             columnList = null;
308         }
309 
310         return columnList;
311     }
312 
313     /**
314      * Parses the the data.
315      * 
316      * @param rows the rows
317      * @param columnList the columns in first line
318      * @param lineNumberReader the line number reader
319      * 
320      * @throws IOException the IO exception
321      * @throws SqlLoaderControlParserException the oracle control parser exception
322      */
323     private void parseTheData(final List columnList, LineNumberReader lineNumberReader, List rows)
324     throws IOException, SqlLoaderControlParserException 
325     {
326         if(logger.isDebugEnabled())
327             logger.debug("parseTheData(columnList={}, lineNumberReader={}, rows={}) - start", 
328                     new Object[] {columnList, lineNumberReader, rows} );
329 
330         int nColumns = columnList.size();
331         List columns;
332         while ((columns = collectExpectedNumberOfColumns(nColumns, lineNumberReader)) != null) {
333             rows.add(columns);
334         }
335     }
336 
337     /**
338      * Collect expected number of columns.
339      * 
340      * @param expectedNumberOfColumns the expected number of columns
341      * @param lineNumberReader the line number reader
342      * 
343      * @return the list
344      * 
345      * @throws IOException the IO exception
346      * @throws SqlLoaderControlParserException the oracle control parser exception
347      */
348     private List collectExpectedNumberOfColumns(
349             int expectedNumberOfColumns,
350             LineNumberReader lineNumberReader) throws IOException, SqlLoaderControlParserException 
351     {
352         if(logger.isDebugEnabled())
353             logger.debug("collectExpectedNumberOfColumns(expectedNumberOfColumns={}, lineNumberReader={}) - start", 
354                 String.valueOf(expectedNumberOfColumns), lineNumberReader);
355 
356         String anotherLine = lineNumberReader.readLine();
357         if (anotherLine == null) {
358             return null;
359         }
360 
361         List columns = null;
362         int columnsCollectedSoFar = 0;
363         StringBuffer buffer = new StringBuffer();
364         boolean shouldProceed = false;
365         while (columnsCollectedSoFar < expectedNumberOfColumns) {
366             try {
367                 buffer.append(anotherLine);
368                 columns = parse(buffer.toString());
369                 columnsCollectedSoFar = columns.size();
370             }
371             catch (IllegalStateException e) {
372                 resetThePipeline();
373                 anotherLine = lineNumberReader.readLine();
374                 if (anotherLine == null) {
375                     break;
376                 }
377                 buffer.append("\n");
378                 shouldProceed = true;
379             }
380             if (!shouldProceed) {
381                 break;
382             }
383         }
384         
385         if (columnsCollectedSoFar != expectedNumberOfColumns) {
386             if (this.hasTrailingNullCols) {
387                 columns.add(SqlLoaderControlProducer.NULL);
388             }
389             else {
390 
391                 String message =
392                     new StringBuffer("Expected ")
393                         .append(expectedNumberOfColumns).append(" columns on line ")
394                         .append(lineNumberReader.getLineNumber()).append(", got ")
395                         .append(columnsCollectedSoFar).append(". Offending line: ").append(buffer)
396                         .toString();
397                 throw new SqlLoaderControlParserException(message);
398             }
399         }
400         return columns;
401     }
402 
403     /**
404      * Gets the pipeline.
405      * 
406      * @return the pipeline
407      */
408     Pipeline getPipeline() 
409     {
410         return this.pipeline;
411     }
412 
413     /**
414      * Sets the pipeline.
415      * 
416      * @param pipeline the pipeline
417      */
418     void setPipeline(Pipeline pipeline) 
419     {
420         this.pipeline = pipeline;
421     }
422 
423     public String getTableName() 
424     {
425         return this.tableName;
426     }
427 }