org.apache.flink.core.fs.FileSystem#WriteMode

Source File: CsvSinkBatchOp.java From Alink with Apache License 2.0

7 votes

@Override
public CsvSinkBatchOp sinkFrom(BatchOperator in) {
    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode mode = FileSystem.WriteMode.NO_OVERWRITE;
    if (getOverwriteSink()) {
        mode = FileSystem.WriteMode.OVERWRITE;
    }

    DataSet<String> textLines = ((DataSet<Row>) in.getDataSet())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .map(new MapFunction<Row, String>() {
            @Override
            public String map(Row value) throws Exception {
                return (String) value.getField(0);
            }
        });

    textLines.writeAsText(filePath, mode).name("csv_sink").setParallelism(numFiles);
    return this;
}

Source File: CsvSinkStreamOp.java From Alink with Apache License 2.0

6 votes

@Override
public CsvSinkStreamOp sinkFrom(StreamOperator in) {
    this.schema = in.getSchema();

    final String filePath = getFilePath();
    final String fieldDelim = getFieldDelimiter();
    final String rowDelimiter = getRowDelimiter();
    final int numFiles = getNumFiles();
    final TypeInformation[] types = in.getColTypes();
    final Character quoteChar = getQuoteChar();

    FileSystem.WriteMode writeMode;
    if (getOverwriteSink()) {
        writeMode = FileSystem.WriteMode.OVERWRITE;
    } else {
        writeMode = FileSystem.WriteMode.NO_OVERWRITE;
    }

    DataStream<Row> output = ((DataStream<Row>) in.getDataStream())
        .map(new CsvUtil.FormatCsvFunc(types, fieldDelim, quoteChar))
        .setParallelism(numFiles);

    CsvTableSink cts = new CsvTableSink(filePath, rowDelimiter, numFiles, writeMode);
    cts.emitDataStream(output);
    return this;
}

Source File: CsvTableSink.java From flink with Apache License 2.0

5 votes

/**
 * A simple {@link TableSink} to emit data as CSV files.
 *
 * @param path       The output path to write the Table to.
 * @param fieldDelim The field delimiter
 * @param numFiles   The number of files to write to
 * @param writeMode  The write mode to specify whether existing files are overwritten or not.
 */
public CsvTableSink(
	String path,
	String fieldDelim,
	int numFiles,
	FileSystem.WriteMode writeMode) {
	this.path = path;
	this.fieldDelim = fieldDelim;
	this.numFiles = numFiles;
	this.writeMode = writeMode;
}

Source File: RheemFileOutputFormat.java From rheem with Apache License 2.0

5 votes

/**
 * Initialization of the distributed file system if it is used.
 *
 * @param parallelism The task parallelism.
 */
@Override
public void initializeGlobal(int parallelism) throws IOException {
    try {
        final Path path = getOutputFilePath();
        final FileSystem fs = path.getFileSystem();

        // only distributed file systems can be initialized at start-up time.
        if (fs.isDistributedFS()) {

            final FileSystem.WriteMode writeMode = getWriteMode();
            final FileOutputFormat.OutputDirectoryMode outDirMode = getOutputDirectoryMode();

            if (parallelism == 1 && outDirMode == FileOutputFormat.OutputDirectoryMode.PARONLY) {
                // output is not written in parallel and should be written to a single file.
                // prepare distributed output path
                if (!fs.initOutPathDistFS(path, writeMode, false)) {
                    // output preparation failed! Cancel task.
                    throw new IOException("Output path could not be initialized.");
                }

            } else {
                // output should be written to a directory

                // only distributed file systems can be initialized at start-up time.
                if (!fs.initOutPathDistFS(path, writeMode, true)) {
                    throw new IOException("Output directory could not be created.");
                }
            }
        }
    }catch (Exception e){
        throw new RheemException(e);
    }
}

Source File: CsvTableSink.java From flink with Apache License 2.0

5 votes

/**
 * A simple {@link TableSink} to emit data as CSV files.
 *
 * @param path       The output path to write the Table to.
 * @param fieldDelim The field delimiter
 * @param numFiles   The number of files to write to
 * @param writeMode  The write mode to specify whether existing files are overwritten or not.
 * @param fieldNames The field names of the table to emit.
 * @param fieldTypes The field types of the table to emit.
 */
public CsvTableSink(
		String path,
		String fieldDelim,
		int numFiles,
		FileSystem.WriteMode writeMode,
		String[] fieldNames,
		DataType[] fieldTypes) {
	this.path = path;
	this.fieldDelim = fieldDelim;
	this.numFiles = numFiles;
	this.writeMode = writeMode;
	this.fieldNames = fieldNames;
	this.fieldTypes = fieldTypes;
}

Source File: CsvTableSink.java From flink with Apache License 2.0

5 votes

/**
 * A simple {@link TableSink} to emit data as CSV files.
 *
 * @param path       The output path to write the Table to.
 * @param fieldDelim The field delimiter
 * @param numFiles   The number of files to write to
 * @param writeMode  The write mode to specify whether existing files are overwritten or not.
 */
public CsvTableSink(
		String path,
		String fieldDelim,
		int numFiles,
		FileSystem.WriteMode writeMode) {
	this.path = path;
	this.fieldDelim = fieldDelim;
	this.numFiles = numFiles;
	this.writeMode = writeMode;
}

Source File: RheemFileOutputFormat.java From rheem with Apache License 2.0

4 votes

public void setWriteMode(FileSystem.WriteMode mode) {
    if (mode == null) {
        throw new NullPointerException();
    }
    this.writeMode = mode;
}

Source File: RheemFileOutputFormat.java From rheem with Apache License 2.0

4 votes

public FileSystem.WriteMode getWriteMode() {
    return this.writeMode;
}

Source File: CsvTableSinkFactoryBase.java From flink with Apache License 2.0

4 votes

protected CsvTableSink createTableSink(
		Boolean isStreaming,
		Map<String, String> properties) {

	DescriptorProperties params = new DescriptorProperties();
	params.putProperties(properties);

	// validate
	new FileSystemValidator().validate(params);
	new OldCsvValidator().validate(params);
	new SchemaValidator(isStreaming, false, false).validate(params);

	// build
	TableSchema tableSchema = TableSchemaUtils.getPhysicalSchema(params.getTableSchema(SCHEMA));

	// if a schema is defined, no matter derive schema is set or not, will use the defined schema
	final boolean hasSchema = params.hasPrefix(FORMAT_FIELDS);
	if (hasSchema) {
		TableSchema formatSchema = params.getTableSchema(FORMAT_FIELDS);
		if (!getFieldLogicalTypes(formatSchema).equals(getFieldLogicalTypes(tableSchema))) {
			throw new TableException(String.format(
					"Encodings that differ from the schema are not supported yet for" +
							" CsvTableSink, format schema is '%s', but table schema is '%s'.",
					formatSchema,
					tableSchema));
		}
	}

	String path = params.getString(CONNECTOR_PATH);
	String fieldDelimiter = params.getOptionalString(FORMAT_FIELD_DELIMITER).orElse(",");
	Optional<String> writeModeParm = params.getOptionalString(FORMAT_WRITE_MODE);
	FileSystem.WriteMode writeMode =
			(writeModeParm.isPresent()) ? FileSystem.WriteMode.valueOf(writeModeParm.get()) : null;
	int numFiles = params.getOptionalInt(FORMAT_NUM_FILES).orElse(-1);

	// bridge to java.sql.Timestamp/Time/Date
	DataType[] dataTypes = Arrays.stream(tableSchema.getFieldDataTypes())
		.map(dt -> {
			switch (dt.getLogicalType().getTypeRoot()) {
				case TIMESTAMP_WITHOUT_TIME_ZONE:
					return dt.bridgedTo(Timestamp.class);
				case TIME_WITHOUT_TIME_ZONE:
					return dt.bridgedTo(Time.class);
				case DATE:
					return dt.bridgedTo(Date.class);
				default:
					return dt;
			}
		})
		.toArray(DataType[]::new);

	return new CsvTableSink(
		path,
		fieldDelimiter,
		numFiles,
		writeMode,
		tableSchema.getFieldNames(),
		dataTypes);
}

Java Code Examples for org.apache.flink.core.fs.FileSystem#WriteMode