org.apache.hadoop.mapred.KeyValueTextInputFormat Java Examples
The following examples show how to use
org.apache.hadoop.mapred.KeyValueTextInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestMultipleInputs.java From hadoop with Apache License 2.0 | 6 votes |
public void testAddInputPathWithMapper() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, MapClass2.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(MapClass2.class, maps.get(new Path("/bar"))); }
Example #2
Source File: TestMultipleInputs.java From big-c with Apache License 2.0 | 6 votes |
public void testAddInputPathWithMapper() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, MapClass2.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(MapClass2.class, maps.get(new Path("/bar"))); }
Example #3
Source File: TestMultipleInputs.java From RDFS with Apache License 2.0 | 6 votes |
public void testAddInputPathWithMapper() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, MapClass2.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(MapClass2.class, maps.get(new Path("/bar"))); }
Example #4
Source File: TestMultipleInputs.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public void testAddInputPathWithMapper() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, MapClass2.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(MapClass2.class, maps.get(new Path("/bar"))); }
Example #5
Source File: TestMultipleInputs.java From hadoop with Apache License 2.0 | 5 votes |
public void testAddInputPathWithFormat() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
Example #6
Source File: TestStreamJob.java From hadoop with Apache License 2.0 | 5 votes |
@Test public void testCreateJob() throws IOException { JobConf job; ArrayList<String> dummyArgs = new ArrayList<String>(); dummyArgs.add("-input"); dummyArgs.add("dummy"); dummyArgs.add("-output"); dummyArgs.add("dummy"); dummyArgs.add("-mapper"); dummyArgs.add("dummy"); dummyArgs.add("-reducer"); dummyArgs.add("dummy"); ArrayList<String> args; args = new ArrayList<String>(dummyArgs); args.add("-inputformat"); args.add("org.apache.hadoop.mapred.KeyValueTextInputFormat"); job = StreamJob.createJob(args.toArray(new String[] {})); assertEquals(KeyValueTextInputFormat.class, job.getInputFormat().getClass()); args = new ArrayList<String>(dummyArgs); args.add("-inputformat"); args.add("org.apache.hadoop.mapred.SequenceFileInputFormat"); job = StreamJob.createJob(args.toArray(new String[] {})); assertEquals(SequenceFileInputFormat.class, job.getInputFormat().getClass()); args = new ArrayList<String>(dummyArgs); args.add("-inputformat"); args.add("org.apache.hadoop.mapred.KeyValueTextInputFormat"); args.add("-inputreader"); args.add("StreamXmlRecordReader,begin=<doc>,end=</doc>"); job = StreamJob.createJob(args.toArray(new String[] {})); assertEquals(StreamInputFormat.class, job.getInputFormat().getClass()); }
Example #7
Source File: TestMultipleInputs.java From big-c with Apache License 2.0 | 5 votes |
public void testAddInputPathWithFormat() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
Example #8
Source File: TestStreamJob.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testCreateJob() throws IOException { JobConf job; ArrayList<String> dummyArgs = new ArrayList<String>(); dummyArgs.add("-input"); dummyArgs.add("dummy"); dummyArgs.add("-output"); dummyArgs.add("dummy"); dummyArgs.add("-mapper"); dummyArgs.add("dummy"); dummyArgs.add("-reducer"); dummyArgs.add("dummy"); ArrayList<String> args; args = new ArrayList<String>(dummyArgs); args.add("-inputformat"); args.add("org.apache.hadoop.mapred.KeyValueTextInputFormat"); job = StreamJob.createJob(args.toArray(new String[] {})); assertEquals(KeyValueTextInputFormat.class, job.getInputFormat().getClass()); args = new ArrayList<String>(dummyArgs); args.add("-inputformat"); args.add("org.apache.hadoop.mapred.SequenceFileInputFormat"); job = StreamJob.createJob(args.toArray(new String[] {})); assertEquals(SequenceFileInputFormat.class, job.getInputFormat().getClass()); args = new ArrayList<String>(dummyArgs); args.add("-inputformat"); args.add("org.apache.hadoop.mapred.KeyValueTextInputFormat"); args.add("-inputreader"); args.add("StreamXmlRecordReader,begin=<doc>,end=</doc>"); job = StreamJob.createJob(args.toArray(new String[] {})); assertEquals(StreamInputFormat.class, job.getInputFormat().getClass()); }
Example #9
Source File: MapOperator.java From attic-apex-malhar with Apache License 2.0 | 5 votes |
private InputSplit[] getSplits(JobConf conf, int numSplits, String path) throws Exception { FileInputFormat.setInputPaths(conf, new Path(path)); if (inputFormat == null) { inputFormat = inputFormatClass.newInstance(); String inputFormatClassName = inputFormatClass.getName(); if (inputFormatClassName.equals("org.apache.hadoop.mapred.TextInputFormat")) { ((TextInputFormat)inputFormat).configure(conf); } else if (inputFormatClassName.equals("org.apache.hadoop.mapred.KeyValueTextInputFormat")) { ((KeyValueTextInputFormat)inputFormat).configure(conf); } } return inputFormat.getSplits(conf, numSplits); // return null; }
Example #10
Source File: TestMultipleInputs.java From RDFS with Apache License 2.0 | 5 votes |
public void testAddInputPathWithFormat() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
Example #11
Source File: CompositeUserJoin.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); } Path userPath = new Path(args[0]); Path commentPath = new Path(args[1]); Path outputDir = new Path(args[2]); String joinType = args[3]; JobConf conf = new JobConf("CompositeJoin"); conf.setJarByClass(CompositeUserJoin.class); conf.setMapperClass(CompositeMapper.class); conf.setNumReduceTasks(0); // Set the input format class to a CompositeInputFormat class. // The CompositeInputFormat will parse all of our input files and output // records to our mapper. conf.setInputFormat(CompositeInputFormat.class); // The composite input format join expression will set how the records // are going to be read in, and in what input format. conf.set("mapred.join.expr", CompositeInputFormat.compose(joinType, KeyValueTextInputFormat.class, userPath, commentPath)); TextOutputFormat.setOutputPath(conf, outputDir); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { Thread.sleep(1000); } return job.isSuccessful() ? 0 : 1; }
Example #12
Source File: TestMultipleInputs.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void testAddInputPathWithFormat() { final JobConf conf = new JobConf(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
Example #13
Source File: TestDelegatingInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
public void testSplitting() throws Exception { JobConf conf = new JobConf(); MiniDFSCluster dfs = null; try { dfs = new MiniDFSCluster.Builder(conf).numDataNodes(4) .racks(new String[] { "/rack0", "/rack0", "/rack1", "/rack1" }) .hosts(new String[] { "host0", "host1", "host2", "host3" }) .build(); FileSystem fs = dfs.getFileSystem(); Path path = getPath("/foo/bar", fs); Path path2 = getPath("/foo/baz", fs); Path path3 = getPath("/bar/bar", fs); Path path4 = getPath("/bar/baz", fs); final int numSplits = 100; MultipleInputs.addInputPath(conf, path, TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, path2, TextInputFormat.class, MapClass2.class); MultipleInputs.addInputPath(conf, path3, KeyValueTextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, path4, TextInputFormat.class, MapClass2.class); DelegatingInputFormat inFormat = new DelegatingInputFormat(); InputSplit[] splits = inFormat.getSplits(conf, numSplits); int[] bins = new int[3]; for (InputSplit split : splits) { assertTrue(split instanceof TaggedInputSplit); final TaggedInputSplit tis = (TaggedInputSplit) split; int index = -1; if (tis.getInputFormatClass().equals(KeyValueTextInputFormat.class)) { // path3 index = 0; } else if (tis.getMapperClass().equals(MapClass.class)) { // path index = 1; } else { // path2 and path4 index = 2; } bins[index]++; } // Each bin is a unique combination of a Mapper and InputFormat, and // DelegatingInputFormat should split each bin into numSplits splits, // regardless of the number of paths that use that Mapper/InputFormat for (int count : bins) { assertEquals(numSplits, count); } assertTrue(true); } finally { if (dfs != null) { dfs.shutdown(); } } }
Example #14
Source File: TestDelegatingInputFormat.java From big-c with Apache License 2.0 | 4 votes |
public void testSplitting() throws Exception { JobConf conf = new JobConf(); MiniDFSCluster dfs = null; try { dfs = new MiniDFSCluster.Builder(conf).numDataNodes(4) .racks(new String[] { "/rack0", "/rack0", "/rack1", "/rack1" }) .hosts(new String[] { "host0", "host1", "host2", "host3" }) .build(); FileSystem fs = dfs.getFileSystem(); Path path = getPath("/foo/bar", fs); Path path2 = getPath("/foo/baz", fs); Path path3 = getPath("/bar/bar", fs); Path path4 = getPath("/bar/baz", fs); final int numSplits = 100; MultipleInputs.addInputPath(conf, path, TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, path2, TextInputFormat.class, MapClass2.class); MultipleInputs.addInputPath(conf, path3, KeyValueTextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, path4, TextInputFormat.class, MapClass2.class); DelegatingInputFormat inFormat = new DelegatingInputFormat(); InputSplit[] splits = inFormat.getSplits(conf, numSplits); int[] bins = new int[3]; for (InputSplit split : splits) { assertTrue(split instanceof TaggedInputSplit); final TaggedInputSplit tis = (TaggedInputSplit) split; int index = -1; if (tis.getInputFormatClass().equals(KeyValueTextInputFormat.class)) { // path3 index = 0; } else if (tis.getMapperClass().equals(MapClass.class)) { // path index = 1; } else { // path2 and path4 index = 2; } bins[index]++; } // Each bin is a unique combination of a Mapper and InputFormat, and // DelegatingInputFormat should split each bin into numSplits splits, // regardless of the number of paths that use that Mapper/InputFormat for (int count : bins) { assertEquals(numSplits, count); } assertTrue(true); } finally { if (dfs != null) { dfs.shutdown(); } } }