1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.dbunit.dataset.sqlloader;
22
23 import java.io.File;
24 import java.io.FileInputStream;
25 import java.io.IOException;
26 import java.io.InputStreamReader;
27 import java.io.LineNumberReader;
28 import java.net.URL;
29 import java.nio.MappedByteBuffer;
30 import java.nio.channels.FileChannel;
31 import java.text.CharacterIterator;
32 import java.text.StringCharacterIterator;
33 import java.util.ArrayList;
34 import java.util.List;
35 import java.util.StringTokenizer;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.dbunit.dataset.common.handlers.EscapeHandler;
40 import org.dbunit.dataset.common.handlers.IllegalInputCharacterException;
41 import org.dbunit.dataset.common.handlers.IsAlnumHandler;
42 import org.dbunit.dataset.common.handlers.Pipeline;
43 import org.dbunit.dataset.common.handlers.PipelineException;
44 import org.dbunit.dataset.common.handlers.QuoteHandler;
45 import org.dbunit.dataset.common.handlers.SeparatorHandler;
46 import org.dbunit.dataset.common.handlers.TransparentHandler;
47 import org.dbunit.dataset.common.handlers.WhitespacesHandler;
48 import org.slf4j.Logger;
49 import org.slf4j.LoggerFactory;
50
51
52
53
54
55
56
57
58
59 public class SqlLoaderControlParserImpl implements SqlLoaderControlParser {
60
61 public static final char SEPARATOR_CHAR = ';';
62
63
64 private Pipeline pipeline;
65
66 private String tableName;
67
68
69
70
71
72 private boolean hasTrailingNullCols;
73
74
75
76
77 private static final Logger logger = LoggerFactory.getLogger(SqlLoaderControlParserImpl.class);
78
79
80
81
82
83 public SqlLoaderControlParserImpl() {
84
85 resetThePipeline();
86
87 }
88
89
90
91
92 private void resetThePipeline() {
93 logger.debug("resetThePipeline() - start");
94
95 this.pipeline = new Pipeline();
96 this.pipeline.getPipelineConfig().setSeparatorChar(SEPARATOR_CHAR);
97
98
99 getPipeline().putFront(SeparatorHandler.ENDPIECE());
100 getPipeline().putFront(EscapeHandler.ACCEPT());
101 getPipeline().putFront(IsAlnumHandler.QUOTE());
102 getPipeline().putFront(QuoteHandler.QUOTE());
103 getPipeline().putFront(EscapeHandler.ESCAPE());
104 getPipeline().putFront(WhitespacesHandler.IGNORE());
105 getPipeline().putFront(TransparentHandler.IGNORE());
106
107 }
108
109
110
111
112
113
114
115
116
117
118
119
120
121 public List parse(String csv) throws PipelineException, IllegalInputCharacterException {
122 logger.debug("parse(csv={}) - start", csv);
123
124 getPipeline().resetProducts();
125 CharacterIterator iterator = new StringCharacterIterator(csv);
126 for (char c = iterator.first(); c != CharacterIterator.DONE; c = iterator.next()) {
127 getPipeline().handle(c);
128 }
129 getPipeline().noMoreInput();
130 getPipeline().thePieceIsDone();
131
132 return getPipeline().getProducts();
133 }
134
135
136
137
138
139
140
141
142
143
144
145
146
147 public List parse(URL url) throws IOException, SqlLoaderControlParserException {
148 logger.debug("parse(url={}) - start", url);
149 return parse(new File(url.toString()));
150 }
151
152
153
154
155
156
157
158
159
160
161
162
163 public List parse(File controlFile)
164 throws IOException, SqlLoaderControlParserException
165 {
166 logger.debug("parse(controlFile={}) - start", controlFile);
167
168 FileInputStream fis = new FileInputStream(controlFile);
169
170 FileChannel fc = fis.getChannel();
171
172 MappedByteBuffer mbf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
173 byte[] barray = new byte[(int) (fc.size())];
174 mbf.get(barray);
175
176 String lines = new String(barray);
177
178 lines = lines.replaceAll("\r", "");
179
180 if (parseForRegexp(lines, "(LOAD\\sDATA).*") != null) {
181
182 String fileName = parseForRegexp(lines, ".*INFILE\\s'(.*?)'.*");
183 File dataFile = resolveFile(controlFile.getParentFile(), fileName);
184
185 this.tableName = parseForRegexp(lines, ".*INTO\\sTABLE\\s(.*?)\\s.*");
186
187
188
189
190
191 if (parseForRegexp(lines, ".*(TRAILING NULLCOLS).*") != "") {
192 this.hasTrailingNullCols = true;
193 }
194 else {
195 this.hasTrailingNullCols = false;
196 }
197
198 List rows = new ArrayList();
199 List columnList = parseColumns(lines, rows);
200
201 LineNumberReader lineNumberReader =
202 new LineNumberReader(new InputStreamReader(new FileInputStream(dataFile)));
203 try {
204 parseTheData(columnList, lineNumberReader, rows);
205 }
206 finally {
207 lineNumberReader.close();
208 }
209
210 return rows;
211 }
212 else {
213 throw new SqlLoaderControlParserException("Control file "
214 + controlFile + " not starting using 'LOAD DATA'");
215 }
216 }
217
218 private File resolveFile(File parentDir, String fileName) {
219
220 File dataFile = new File(fileName);
221
222
223 if(!dataFile.isAbsolute()) {
224 fileName = fileName.replaceAll("\\\\", "/");
225
226 if(fileName.startsWith("./")){
227 fileName = fileName.substring(2);
228 }
229
230 if(fileName.startsWith(".")){
231 fileName = fileName.substring(1);
232 }
233 dataFile = new File(parentDir, fileName);
234 }
235 return dataFile;
236 }
237
238 protected String parseForRegexp(String controlFileContent, String regexp)
239 throws IOException
240 {
241 logger.debug("parseForRegexp(controlFileContent={}, regexp={}) - start", controlFileContent, regexp);
242
243 if (controlFileContent == null) {
244 throw new NullPointerException("control file has no content");
245 }
246
247 final Pattern pattern = Pattern.compile(regexp, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
248 final Matcher matches = pattern.matcher(controlFileContent);
249
250 if (matches.find()) {
251 String inFileLine = matches.group(1);
252
253 return inFileLine;
254 }
255 else {
256 return null;
257 }
258 }
259
260
261
262
263
264
265
266
267
268
269
270
271
272 private List parseColumns(String controlFileContent, List rows) throws IOException,
273 SqlLoaderControlParserException
274 {
275 logger.debug("parseColumns(controlFileContent={}, rows={}) - start", controlFileContent, rows);
276
277 List columnList;
278
279 final Pattern pattern =
280 Pattern
281 .compile(".*FIELDS\\s.*\\(\\n(.*?)\\n\\)", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
282 final Matcher matches = pattern.matcher(controlFileContent);
283
284 if (matches.find()) {
285 String columnFragment = matches.group(1);
286
287
288 columnList = new ArrayList();
289
290 columnFragment = columnFragment.replaceAll("\".*?\"", "");
291 columnFragment = columnFragment.replaceAll("\n", "");
292
293 StringTokenizer tok = new StringTokenizer(columnFragment, ",");
294
295 while (tok.hasMoreElements()) {
296
297 String col = (String) tok.nextElement();
298 col = parseForRegexp(col, ".*^([a-zA-Z0-9_]*)\\s").trim();
299 columnList.add(col);
300 }
301
302
303 rows.add(columnList);
304 }
305
306 else {
307 columnList = null;
308 }
309
310 return columnList;
311 }
312
313
314
315
316
317
318
319
320
321
322
323 private void parseTheData(final List columnList, LineNumberReader lineNumberReader, List rows)
324 throws IOException, SqlLoaderControlParserException
325 {
326 if(logger.isDebugEnabled())
327 logger.debug("parseTheData(columnList={}, lineNumberReader={}, rows={}) - start",
328 new Object[] {columnList, lineNumberReader, rows} );
329
330 int nColumns = columnList.size();
331 List columns;
332 while ((columns = collectExpectedNumberOfColumns(nColumns, lineNumberReader)) != null) {
333 rows.add(columns);
334 }
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348 private List collectExpectedNumberOfColumns(
349 int expectedNumberOfColumns,
350 LineNumberReader lineNumberReader) throws IOException, SqlLoaderControlParserException
351 {
352 if(logger.isDebugEnabled())
353 logger.debug("collectExpectedNumberOfColumns(expectedNumberOfColumns={}, lineNumberReader={}) - start",
354 String.valueOf(expectedNumberOfColumns), lineNumberReader);
355
356 String anotherLine = lineNumberReader.readLine();
357 if (anotherLine == null) {
358 return null;
359 }
360
361 List columns = null;
362 int columnsCollectedSoFar = 0;
363 final StringBuilder buffer = new StringBuilder();
364 boolean shouldProceed = false;
365 while (columnsCollectedSoFar < expectedNumberOfColumns) {
366 try {
367 buffer.append(anotherLine);
368 columns = parse(buffer.toString());
369 columnsCollectedSoFar = columns.size();
370 }
371 catch (IllegalStateException e) {
372 resetThePipeline();
373 anotherLine = lineNumberReader.readLine();
374 if (anotherLine == null) {
375 break;
376 }
377 buffer.append("\n");
378 shouldProceed = true;
379 }
380 if (!shouldProceed) {
381 break;
382 }
383 }
384
385 if (columnsCollectedSoFar != expectedNumberOfColumns) {
386 if (this.hasTrailingNullCols) {
387 columns.add(SqlLoaderControlProducer.NULL);
388 }
389 else {
390
391 String message =
392 new StringBuilder("Expected ")
393 .append(expectedNumberOfColumns).append(" columns on line ")
394 .append(lineNumberReader.getLineNumber()).append(", got ")
395 .append(columnsCollectedSoFar).append(". Offending line: ").append(buffer)
396 .toString();
397 throw new SqlLoaderControlParserException(message);
398 }
399 }
400 return columns;
401 }
402
403
404
405
406
407
408 Pipeline getPipeline()
409 {
410 return this.pipeline;
411 }
412
413
414
415
416
417
418 void setPipeline(Pipeline pipeline)
419 {
420 this.pipeline = pipeline;
421 }
422
423 public String getTableName()
424 {
425 return this.tableName;
426 }
427 }