org.apache.flink.types.parser.FieldParser#getErrorState

Source File: StringParsers.java From Alink with Apache License 2.0

6 votes

static Tuple2<Boolean, Object> parseField(FieldParser<?> parser, String token, boolean isStringField) {
    if (isStringField) {
        return Tuple2.of(true, token);
    } else {
        if (StringUtils.isNullOrWhitespaceOnly(token)) {
            return Tuple2.of(false, null);
        }
        byte[] bytes = token.getBytes();
        parser.resetErrorStateAndParse(bytes, 0, bytes.length, new byte[]{0}, null);
        FieldParser.ParseErrorState errorState = parser.getErrorState();
        if (errorState != FieldParser.ParseErrorState.NONE) {
            return Tuple2.of(false, null);
        } else {
            return Tuple2.of(true, parser.getLastResult());
        }
    }
}

Source File: ColumnsWriter.java From Alink with Apache License 2.0

6 votes

static Tuple2 <Boolean, Object> parseField(FieldParser <?> parser, String token, boolean isStringField) {
	if (isStringField) {
		return Tuple2.of(true, token);
	} else {
		if (StringUtils.isNullOrWhitespaceOnly(token)) {
			return Tuple2.of(false, null);
		}
		byte[] bytes = token.getBytes();
		parser.resetErrorStateAndParse(bytes, 0, bytes.length, new byte[] {0}, null);
		FieldParser.ParseErrorState errorState = parser.getErrorState();
		if (errorState != FieldParser.ParseErrorState.NONE) {
			return Tuple2.of(false, null);
		} else {
			return Tuple2.of(true, parser.getLastResult());
		}
	}
}

Source File: CsvParser.java From Alink with Apache License 2.0

6 votes

private Tuple2<Boolean, Object> parseField(FieldParser<?> parser, String token, boolean isStringField) {
    if (isStringField) {
        if (!enableQuote || token.charAt(0) != quoteChar) {
            return Tuple2.of(true, token);
        }
        String content;
        if (token.endsWith(quoteChar.toString())) {
            content = token.substring(1, token.length() - 1);
        } else {
            content = token.substring(1, token.length());
        }
        return Tuple2.of(true, content.replace(escapedQuote, quoteString));
    } else {
        if (StringUtils.isNullOrWhitespaceOnly(token)) {
            return Tuple2.of(true, null);
        }
        byte[] bytes = token.getBytes();
        parser.resetErrorStateAndParse(bytes, 0, bytes.length, fieldDelim.getBytes(), null);
        FieldParser.ParseErrorState errorState = parser.getErrorState();
        if (errorState != FieldParser.ParseErrorState.NONE) {
            return Tuple2.of(false, null);
        } else {
            return Tuple2.of(true, parser.getLastResult());
        }
    }
}

Source File: RowCsvInputFormat.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}

Source File: RowCsvInputFormat.java From flink with Apache License 2.0

4 votes

@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}

Source File: RowCsvInputFormat.java From flink with Apache License 2.0

4 votes

@Override
protected boolean parseRecord(Object[] holders, byte[] bytes, int offset, int numBytes) throws ParseException {
	byte[] fieldDelimiter = this.getFieldDelimiter();
	boolean[] fieldIncluded = this.fieldIncluded;

	int startPos = offset;
	int limit = offset + numBytes;

	int field = 0;
	int output = 0;
	while (field < fieldIncluded.length) {

		// check valid start position
		if (startPos > limit || (startPos == limit && field != fieldIncluded.length - 1)) {
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes, getCharset()));
			}
		}

		if (fieldIncluded[field]) {
			// parse field
			FieldParser<Object> parser = (FieldParser<Object>) this.getFieldParsers()[fieldPosMap[output]];
			int latestValidPos = startPos;
			startPos = parser.resetErrorStateAndParse(
				bytes,
				startPos,
				limit,
				fieldDelimiter,
				holders[fieldPosMap[output]]);

			if (!isLenient() && (parser.getErrorState() != FieldParser.ParseErrorState.NONE)) {
				// the error state EMPTY_COLUMN is ignored
				if (parser.getErrorState() != FieldParser.ParseErrorState.EMPTY_COLUMN) {
					throw new ParseException(String.format("Parsing error for column %1$s of row '%2$s' originated by %3$s: %4$s.",
						field + 1, new String(bytes, offset, numBytes), parser.getClass().getSimpleName(), parser.getErrorState()));
				}
			}
			holders[fieldPosMap[output]] = parser.getLastResult();

			// check parse result:
			// the result is null if it is invalid
			// or empty with emptyColumnAsNull enabled
			if (startPos < 0 ||
				(emptyColumnAsNull && (parser.getErrorState().equals(FieldParser.ParseErrorState.EMPTY_COLUMN)))) {
				holders[fieldPosMap[output]] = null;
				startPos = skipFields(bytes, latestValidPos, limit, fieldDelimiter);
			}
			output++;
		} else {
			// skip field
			startPos = skipFields(bytes, startPos, limit, fieldDelimiter);
		}

		// check if something went wrong
		if (startPos < 0) {
			throw new ParseException(String.format("Unexpected parser position for column %1$s of row '%2$s'",
				field + 1, new String(bytes, offset, numBytes)));
		}
		else if (startPos == limit
				&& field != fieldIncluded.length - 1
				&& !FieldParser.endsWithDelimiter(bytes, startPos - 1, fieldDelimiter)) {
			// We are at the end of the record, but not all fields have been read
			// and the end is not a field delimiter indicating an empty last field.
			if (isLenient()) {
				return false;
			} else {
				throw new ParseException("Row too short: " + new String(bytes, offset, numBytes));
			}
		}

		field++;
	}
	return true;
}

Java Code Examples for org.apache.flink.types.parser.FieldParser#getErrorState()