org.apache.flink.api.common.io.ParseException Java Examples
The following examples show how to use
org.apache.flink.api.common.io.ParseException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RowCsvInputFormat.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException { byte[] fieldDelimiter = this.getFieldDelimiter(); boolean[] fieldIncluded = this.fieldIncluded; int startPos = offset; int limit = offset + numBytes; int field = 0; int output = 0; while (field < fieldIncluded.length) { // check valid start position if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) { if (isLenient()) { return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset())); } } if (fieldIncluded[field]) { // parse field FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]]; int latestValidPos = startPos; startPos = parser.resetErrorStateAndParse( bytes, startPos, limit, fieldDelimiter, holders[fieldPosMap[output]]); if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) { // the error state EMPTY_COLUMN is ignored if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) { throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.", field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState())); } } holders[fieldPosMap[output]] = parser.getLastResult(); // check parse result: // the result is null if it is invalid // or empty with emptyColumnAsNull enabled if (startPos < 0 || (emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) { holders[fieldPosMap[output]] = null; startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter); } output++; } else { // skip field startPos = skipFields(bytes, startPos, limit, fieldDelimiter); } // check if something went wrong if (startPos < 0) { throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'", field + 1, new String(bytes, offset, numBytes))); } else if (startPos == limit && field != fieldIncluded.length - 1 && !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) { // We are at the end of the record, but not all fields have been read // and the end is not a field delimiter indicating an empty last field. if (isLenient()) { return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes)); } } field++; } return true; }
Example #2
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { final String fileContent = "aa,bb,cc\n" + // ok "aa,bb,\n" + // the last field is empty "aa,,\n" + // the last two fields are empty ",,\n" + // all fields are empty "aa,bb"; // row too short final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class); final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo); format.setFieldDelimiter(","); format.configure(new Configuration()); format.open(split); Tuple3<String, String, String> result = new Tuple3<String, String, String>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("cc", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }
Example #3
Source File: RowCsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { String fileContent = "abc|-def|-ghijk\n" + "abc|-def|-\n" + "abc|-|-\n" + "|-|-|-\n" + "|-|-\n" + "abc|-def\n"; FileInputSplit split = createTempFile(fileContent); TypeInformation[] fieldTypes = new TypeInformation[]{ BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO}; RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|"); format.setFieldDelimiter("|-"); format.configure(new Configuration()); format.open(split); Row result = new Row(3); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("def", result.getField(1)); assertEquals("ghijk", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("def", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }
Example #4
Source File: RowCsvInputFormat.java From flink with Apache License 2.0 | 4 votes |
@Override protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException { byte[] fieldDelimiter = this.getFieldDelimiter(); boolean[] fieldIncluded = this.fieldIncluded; int startPos = offset; int limit = offset + numBytes; int field = 0; int output = 0; while (field < fieldIncluded.length) { // check valid start position if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) { if (isLenient()) { return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset())); } } if (fieldIncluded[field]) { // parse field FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]]; int latestValidPos = startPos; startPos = parser.resetErrorStateAndParse( bytes, startPos, limit, fieldDelimiter, holders[fieldPosMap[output]]); if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) { // the error state EMPTY_COLUMN is ignored if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) { throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.", field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState())); } } holders[fieldPosMap[output]] = parser.getLastResult(); // check parse result: // the result is null if it is invalid // or empty with emptyColumnAsNull enabled if (startPos < 0 || (emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) { holders[fieldPosMap[output]] = null; startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter); } output++; } else { // skip field startPos = skipFields(bytes, startPos, limit, fieldDelimiter); } // check if something went wrong if (startPos < 0) { throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'", field + 1, new String(bytes, offset, numBytes))); } else if (startPos == limit && field != fieldIncluded.length - 1 && !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) { // We are at the end of the record, but not all fields have been read // and the end is not a field delimiter indicating an empty last field. if (isLenient()) { return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes)); } } field++; } return true; }
Example #5
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { final String fileContent = "aa,bb,cc\n" + // ok "aa,bb,\n" + // the last field is empty "aa,,\n" + // the last two fields are empty ",,\n" + // all fields are empty "aa,bb"; // row too short final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class); final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo); format.setFieldDelimiter(","); format.configure(new Configuration()); format.open(split); Tuple3<String, String, String> result = new Tuple3<String, String, String>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("cc", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }
Example #6
Source File: RowCsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { String fileContent = "abc|-def|-ghijk\n" + "abc|-def|-\n" + "abc|-|-\n" + "|-|-|-\n" + "|-|-\n" + "abc|-def\n"; FileInputSplit split = createTempFile(fileContent); TypeInformation[] fieldTypes = new TypeInformation[]{ BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO}; RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|"); format.setFieldDelimiter("|-"); format.configure(new Configuration()); format.open(split); Row result = new Row(3); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("def", result.getField(1)); assertEquals("ghijk", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("def", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }
Example #7
Source File: RowCsvInputFormat.java From flink with Apache License 2.0 | 4 votes |
@Override protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException { byte[] fieldDelimiter = this.getFieldDelimiter(); boolean[] fieldIncluded = this.fieldIncluded; int startPos = offset; int limit = offset + numBytes; int field = 0; int output = 0; while (field < fieldIncluded.length) { // check valid start position if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) { if (isLenient()) { return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset())); } } if (fieldIncluded[field]) { // parse field FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]]; int latestValidPos = startPos; startPos = parser.resetErrorStateAndParse( bytes, startPos, limit, fieldDelimiter, holders[fieldPosMap[output]]); if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) { // the error state EMPTY_COLUMN is ignored if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) { throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.", field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState())); } } holders[fieldPosMap[output]] = parser.getLastResult(); // check parse result: // the result is null if it is invalid // or empty with emptyColumnAsNull enabled if (startPos < 0 || (emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) { holders[fieldPosMap[output]] = null; startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter); } output++; } else { // skip field startPos = skipFields(bytes, startPos, limit, fieldDelimiter); } // check if something went wrong if (startPos < 0) { throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'", field + 1, new String(bytes, offset, numBytes))); } else if (startPos == limit && field != fieldIncluded.length - 1 && !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) { // We are at the end of the record, but not all fields have been read // and the end is not a field delimiter indicating an empty last field. if (isLenient()) { return false; } else { throw new ParseException("Row too short: " + new String(bytes, offset, numBytes)); } } field++; } return true; }
Example #8
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { final String fileContent = "aa,bb,cc\n" + // ok "aa,bb,\n" + // the last field is empty "aa,,\n" + // the last two fields are empty ",,\n" + // all fields are empty "aa,bb"; // row too short final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class); final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo); format.setFieldDelimiter(","); format.configure(new Configuration()); format.open(split); Tuple3<String, String, String> result = new Tuple3<String, String, String>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("cc", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }
Example #9
Source File: RowCsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { String fileContent = "abc|-def|-ghijk\n" + "abc|-def|-\n" + "abc|-|-\n" + "|-|-|-\n" + "|-|-\n" + "abc|-def\n"; FileInputSplit split = createTempFile(fileContent); TypeInformation[] fieldTypes = new TypeInformation[]{ BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO}; RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|"); format.setFieldDelimiter("|-"); format.configure(new Configuration()); format.open(split); Row result = new Row(3); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("def", result.getField(1)); assertEquals("ghijk", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("def", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.getField(0)); assertEquals("", result.getField(1)); assertEquals("", result.getField(2)); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }