Java Code Examples for org.apache.flink.api.java.typeutils.TupleTypeInfo#getBasicTupleTypeInfo()
The following examples show how to use
org.apache.flink.api.java.typeutils.TupleTypeInfo#getBasicTupleTypeInfo() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractSortMergeOuterJoinIteratorITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Before public void beforeTest() { ExecutionConfig config = new ExecutionConfig(); config.disableObjectReuse(); TupleTypeInfo<Tuple2<String, String>> typeInfo1 = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class); TupleTypeInfo<Tuple2<String, Integer>> typeInfo2 = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Integer.class); serializer1 = typeInfo1.createSerializer(config); serializer2 = typeInfo2.createSerializer(config); comparator1 = typeInfo1.createComparator(new int[]{0}, new boolean[]{true}, 0, config); comparator2 = typeInfo2.createComparator(new int[]{0}, new boolean[]{true}, 0, config); pairComp = new GenericPairComparator<>(comparator1, comparator2); this.memoryManager = new MemoryManager(MEMORY_SIZE, 1); this.ioManager = new IOManagerAsync(); }
Example 2
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind multiple map ops. */ @Test public void checkJoinWithReplicatedSourceInputBehindMultiMaps() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .mapPartition(new IdPMap()) .flatMap(new IdFlatMap()) .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 3
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests cross program with replicated data source. */ @Test public void checkCrossWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .cross(source2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when cross should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy(); ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2); }
Example 4
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testQuotedStringParsingWithIncludeFields() throws Exception { final String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" + "\"Blahblah <[email protected]>\"|\"blaaa|\"blubb\""; final File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile)); writer.write(fileContent); writer.close(); TupleTypeInfo<Tuple2<String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class); CsvInputFormat<Tuple2<String, String>> inputFormat = new TupleCsvInputFormat<Tuple2<String, String>>(new Path(tempFile.toURI().toString()), typeInfo, new boolean[]{true, false, true}); inputFormat.enableQuotedStringParsing('"'); inputFormat.setFieldDelimiter("|"); inputFormat.setDelimiter('\n'); inputFormat.configure(new Configuration()); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); Tuple2<String, String> record = inputFormat.nextRecord(new Tuple2<String, String>()); assertEquals("20:41:52-1-3-2015", record.f0); assertEquals("Blahblah <[email protected]>", record.f1); }
Example 5
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testQuotedStringParsingWithIncludeFields() throws Exception { final String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" + "\"Blahblah <[email protected]>\"|\"blaaa|\"blubb\""; final File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile)); writer.write(fileContent); writer.close(); TupleTypeInfo<Tuple2<String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class); CsvInputFormat<Tuple2<String, String>> inputFormat = new TupleCsvInputFormat<Tuple2<String, String>>(new Path(tempFile.toURI().toString()), typeInfo, new boolean[]{true, false, true}); inputFormat.enableQuotedStringParsing('"'); inputFormat.setFieldDelimiter("|"); inputFormat.setDelimiter('\n'); inputFormat.configure(new Configuration()); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); Tuple2<String, String> record = inputFormat.nextRecord(new Tuple2<String, String>()); assertEquals("20:41:52-1-3-2015", record.f0); assertEquals("Blahblah <[email protected]>", record.f1); }
Example 6
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source. */ @Test public void checkJoinWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 7
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testReadSparseWithPositionSetter() throws IOException { try { final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class); final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new int[]{0, 3, 7}); format.setFieldDelimiter("|"); format.configure(new Configuration()); format.open(split); Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>(); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(111), result.f0); assertEquals(Integer.valueOf(444), result.f1); assertEquals(Integer.valueOf(888), result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(000), result.f0); assertEquals(Integer.valueOf(777), result.f1); assertEquals(Integer.valueOf(333), result.f2); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 8
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testReadSparseWithNullFieldsForTypes() throws IOException { try { final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class); final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[]{true, false, false, true, false, false, false, true}); format.setFieldDelimiter("|x|"); format.configure(new Configuration()); format.open(split); Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>(); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(111), result.f0); assertEquals(Integer.valueOf(444), result.f1); assertEquals(Integer.valueOf(888), result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(000), result.f0); assertEquals(Integer.valueOf(777), result.f1); assertEquals(Integer.valueOf(333), result.f2); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 9
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind flatMap. */ @Test public void checkJoinWithReplicatedSourceInputBehindFlatMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .flatMap(new IdFlatMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 10
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 11
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadSparseWithNullFieldsForTypes() throws IOException { try { final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class); final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[]{true, false, false, true, false, false, false, true}); format.setFieldDelimiter("|x|"); format.configure(new Configuration()); format.open(split); Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>(); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(111), result.f0); assertEquals(Integer.valueOf(444), result.f1); assertEquals(Integer.valueOf(888), result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(000), result.f0); assertEquals(Integer.valueOf(777), result.f1); assertEquals(Integer.valueOf(333), result.f2); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 12
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadSparseWithNullFieldsForTypes() throws IOException { try { final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class); final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[]{true, false, false, true, false, false, false, true}); format.setFieldDelimiter("|x|"); format.configure(new Configuration()); format.open(split); Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>(); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(111), result.f0); assertEquals(Integer.valueOf(444), result.f1); assertEquals(Integer.valueOf(888), result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(000), result.f0); assertEquals(Integer.valueOf(777), result.f1); assertEquals(Integer.valueOf(333), result.f2); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 13
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 14
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testIntegerFields() throws IOException { try { final String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple5<Integer, Integer, Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class, Integer.class, Integer.class); final CsvInputFormat<Tuple5<Integer, Integer, Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple5<Integer, Integer, Integer, Integer, Integer>>(PATH, typeInfo); format.setFieldDelimiter("|"); format.configure(new Configuration()); format.open(split); Tuple5<Integer, Integer, Integer, Integer, Integer> result = new Tuple5<Integer, Integer, Integer, Integer, Integer>(); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(111), result.f0); assertEquals(Integer.valueOf(222), result.f1); assertEquals(Integer.valueOf(333), result.f2); assertEquals(Integer.valueOf(444), result.f3); assertEquals(Integer.valueOf(555), result.f4); result = format.nextRecord(result); assertNotNull(result); assertEquals(Integer.valueOf(666), result.f0); assertEquals(Integer.valueOf(777), result.f1); assertEquals(Integer.valueOf(888), result.f2); assertEquals(Integer.valueOf(999), result.f3); assertEquals(Integer.valueOf(000), result.f4); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 15
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testTailingEmptyFields() throws Exception { final String fileContent = "aa,bb,cc\n" + // ok "aa,bb,\n" + // the last field is empty "aa,,\n" + // the last two fields are empty ",,\n" + // all fields are empty "aa,bb"; // row too short final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class); final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo); format.setFieldDelimiter(","); format.configure(new Configuration()); format.open(split); Tuple3<String, String, String> result = new Tuple3<String, String, String>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("cc", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("bb", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("aa", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); try { format.nextRecord(result); fail("Parse Exception was not thrown! (Row too short)"); } catch (ParseException e) {} }
Example 16
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void ignoreMultiCharPrefixComments() { try { final String fileContent = "//description of the data\n" + "//successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "//next|5|6.0|\n"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Integer.class, Double.class); final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo); format.setCommentPrefix("//"); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split); Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("this is", result.f0); assertEquals(Integer.valueOf(1), result.f1); assertEquals(new Double(2.0), result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("a test", result.f0); assertEquals(Integer.valueOf(3), result.f1); assertEquals(new Double(4.0), result.f2); result = format.nextRecord(result); assertNull(result); } catch (Exception ex) { ex.printStackTrace(); fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 17
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void readStringFieldsWithTrailingDelimiters() { try { final String fileContent = "abc|-def|-ghijk\nabc|-|-hhg\n|-|-|-\n"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class); final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo); format.setFieldDelimiter("|-"); format.configure(new Configuration()); format.open(split); Tuple3<String, String, String> result = new Tuple3<String, String, String>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.f0); assertEquals("def", result.f1); assertEquals("ghijk", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.f0); assertEquals("", result.f1); assertEquals("hhg", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 18
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void readStringFields() { try { final String fileContent = "abc|def|ghijk\nabc||hhg\n|||"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, String.class, String.class); final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, "\n", "|", typeInfo); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split); Tuple3<String, String, String> result = new Tuple3<String, String, String>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.f0); assertEquals("def", result.f1); assertEquals("ghijk", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("abc", result.f0); assertEquals("", result.f1); assertEquals("hhg", result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("", result.f0); assertEquals("", result.f1); assertEquals("", result.f2); result = format.nextRecord(result); assertNull(result); assertTrue(format.reachedEnd()); } catch (Exception ex) { ex.printStackTrace(); fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 19
Source File: CsvInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void ignoreMultiCharPrefixComments() { try { final String fileContent = "//description of the data\n" + "//successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "//next|5|6.0|\n"; final FileInputSplit split = createTempFile(fileContent); final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class, Integer.class, Double.class); final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo); format.setCommentPrefix("//"); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split); Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>(); result = format.nextRecord(result); assertNotNull(result); assertEquals("this is", result.f0); assertEquals(Integer.valueOf(1), result.f1); assertEquals(new Double(2.0), result.f2); result = format.nextRecord(result); assertNotNull(result); assertEquals("a test", result.f0); assertEquals(Integer.valueOf(3), result.f1); assertEquals(new Double(4.0), result.f2); result = format.nextRecord(result); assertNull(result); } catch (Exception ex) { ex.printStackTrace(); fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage()); } }
Example 20
Source File: CsvInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) { File tempFile = null; String fileContent = CsvInputFormatTest.FIRST_PART + lineBreakerInFile + CsvInputFormatTest.SECOND_PART + lineBreakerInFile; try { // create input file tempFile = File.createTempFile("CsvInputFormatTest", "tmp"); tempFile.deleteOnExit(); tempFile.setWritable(true); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile)); wrt.write(fileContent); wrt.close(); final TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); final CsvInputFormat<Tuple1<String>> inputFormat = new TupleCsvInputFormat<Tuple1<String>>(new Path(tempFile.toURI().toString()), typeInfo); Configuration parameters = new Configuration(); inputFormat.configure(parameters); inputFormat.setDelimiter(lineBreakerSetup); FileInputSplit[] splits = inputFormat.createInputSplits(1); inputFormat.open(splits[0]); Tuple1<String> result = inputFormat.nextRecord(new Tuple1<String>()); assertNotNull("Expecting to not return null", result); assertEquals(FIRST_PART, result.f0); result = inputFormat.nextRecord(result); assertNotNull("Expecting to not return null", result); assertEquals(SECOND_PART, result.f0); } catch (Throwable t) { System.err.println("test failed with exception: " + t.getMessage()); t.printStackTrace(System.err); fail("Test erroneous"); } }