org.apache.flink.hadoopcompatibility.HadoopInputs Java Examples
The following examples show how to use
org.apache.flink.hadoopcompatibility.HadoopInputs.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass, Class valueClass) throws IOException { List<String> inputFolders = Lists.newArrayList(); Path inputHDFSPath = new Path(inputPath); FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath); boolean hasDir = false; for (FileStatus stat : fileStatuses) { if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) { hasDir = true; inputFolders.add(stat.getPath().toString()); } } if (!hasDir) { return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString())); } Job job = Job.getInstance(); FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ",")); return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job)); }
Example #2
Source File: FlinkUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static DataSet<String[]> readHiveRecords(boolean isSequenceFile, ExecutionEnvironment env, String inputPath, String hiveTable, Job job) throws IOException { DataSet<String[]> recordDataSet; if (isSequenceFile) { recordDataSet = env .createInput(HadoopInputs.readHadoopFile(new SequenceFileInputFormat(), BytesWritable.class, Text.class, inputPath, job), TypeInformation.of(new TypeHint<Tuple2<BytesWritable, Text>>() {})) .map(new MapFunction<Tuple2<BytesWritable, Text>, String[]>() { @Override public String[] map(Tuple2<BytesWritable, Text> tuple2) throws Exception { String s = Bytes.toString(tuple2.f1.getBytes(), 0, tuple2.f1.getLength()); return s.split(BatchConstants.SEQUENCE_FILE_DEFAULT_DELIMITER); } }); } else { throw new UnsupportedOperationException("Currently, Flink does not support read hive table directly."); } return recordDataSet; }
Example #3
Source File: FlinkUtil.java From kylin with Apache License 2.0 | 6 votes |
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass, Class valueClass) throws IOException { List<String> inputFolders = Lists.newArrayList(); Path inputHDFSPath = new Path(inputPath); FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath); boolean hasDir = false; for (FileStatus stat : fileStatuses) { if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) { hasDir = true; inputFolders.add(stat.getPath().toString()); } } if (!hasDir) { return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString())); } Job job = Job.getInstance(); FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ",")); return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job)); }
Example #4
Source File: FlinkUtil.java From kylin with Apache License 2.0 | 6 votes |
public static DataSet<String[]> readHiveRecords(boolean isSequenceFile, ExecutionEnvironment env, String inputPath, String hiveTable, Job job) throws IOException { DataSet<String[]> recordDataSet; if (isSequenceFile) { recordDataSet = env .createInput(HadoopInputs.readHadoopFile(new SequenceFileInputFormat(), BytesWritable.class, Text.class, inputPath, job), TypeInformation.of(new TypeHint<Tuple2<BytesWritable, Text>>() {})) .map(new MapFunction<Tuple2<BytesWritable, Text>, String[]>() { @Override public String[] map(Tuple2<BytesWritable, Text> tuple2) throws Exception { String s = Bytes.toString(tuple2.f1.getBytes(), 0, tuple2.f1.getLength()); return s.split(BatchConstants.SEQUENCE_FILE_DEFAULT_DELIMITER); } }); } else { throw new UnsupportedOperationException("Currently, Flink does not support read hive table directly."); } return recordDataSet; }