Java Code Examples for org.kitesdk.data.DatasetReader#close()
The following examples show how to use
org.kitesdk.data.DatasetReader#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestParquetImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testOverrideTypeMapping() throws IOException { String [] types = { "INT" }; String [] vals = { "10" }; createTableWithColTypes(types, vals); String [] extraArgs = { "--map-column-java", "DATA_COL0=String"}; runImport(getOutputArgv(true, extraArgs)); Schema schema = getSchema(); assertEquals(Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "DATA_COL0", Type.STRING); DatasetReader<GenericRecord> reader = getReader(); try { assertTrue(reader.hasNext()); GenericRecord record1 = reader.next(); assertEquals("DATA_COL0", "10", record1.get("DATA_COL0")); assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 2
Source File: TestParquetImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testFirstUnderscoreInColumnName() throws IOException { String [] names = { "_NAME" }; String [] types = { "INT" }; String [] vals = { "1987" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Schema schema = getSchema(); assertEquals(Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "__NAME", Type.INT); DatasetReader<GenericRecord> reader = getReader(); try { assertTrue(reader.hasNext()); GenericRecord record1 = reader.next(); assertEquals("__NAME", 1987, record1.get("__NAME")); assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 3
Source File: TestParquetImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testNonIdentCharactersInColumnName() throws IOException { String [] names = { "test_p-a+r/quet" }; String [] types = { "INT" }; String [] vals = { "2015" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Schema schema = getSchema(); assertEquals(Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "TEST_P_A_R_QUET", Type.INT); DatasetReader<GenericRecord> reader = getReader(); try { assertTrue(reader.hasNext()); GenericRecord record1 = reader.next(); assertEquals("TEST_P_A_R_QUET", 2015, record1.get("TEST_P_A_R_QUET")); assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 4
Source File: TestParquetImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testNullableParquetImport() throws IOException, SQLException { String [] types = { "INT" }; String [] vals = { null }; createTableWithColTypes(types, vals); runImport(getOutputArgv(true, null)); DatasetReader<GenericRecord> reader = getReader(); try { assertTrue(reader.hasNext()); GenericRecord record1 = reader.next(); assertNull(record1.get("DATA_COL0")); assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 5
Source File: ReadDataset.java From kite-examples with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { // Load the events dataset Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events"); // Get a reader for the dataset and read all the events DatasetReader<GenericRecord> reader = events.newReader(); try { for (GenericRecord event : reader) { System.out.println(event); } } finally { reader.close(); } return 0; }
Example 6
Source File: TestParquetImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
public void testIncrementalParquetImport() throws IOException, SQLException { String [] types = { "INT" }; String [] vals = { "1" }; createTableWithColTypes(types, vals); runImport(getOutputArgv(true, null)); runImport(getOutputArgv(true, new String[]{"--append"})); DatasetReader<GenericRecord> reader = getReader(); try { assertTrue(reader.hasNext()); GenericRecord record1 = reader.next(); assertEquals(1, record1.get("DATA_COL0")); record1 = reader.next(); assertEquals(1, record1.get("DATA_COL0")); assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 7
Source File: ReadMovies.java From kite-examples with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Dataset<Record> movies = Datasets.load( "dataset:hdfs:/tmp/data/movies", Record.class); DatasetReader<Record> reader = null; try { reader = movies.newReader(); for (Record rec : reader) { System.err.println("Movie: " + rec); } } finally { if (reader != null) { reader.close(); } } return 0; }
Example 8
Source File: ReadDataset.java From kite-examples with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { // Load the events dataset Dataset<GenericRecord> events = Datasets.load("dataset:hive:/tmp/data/default/events"); // Get a reader for the dataset and read all the events DatasetReader<GenericRecord> reader = events.newReader(); try { for (GenericRecord event : reader) { System.out.println(event); } } finally { reader.close(); } return 0; }
Example 9
Source File: UserProfileDatasetExample.java From kite with Apache License 2.0 | 6 votes |
/** * Print the user profiles and actions for all users with the provided last * name * * This method demonstrates how to open a scanner with a start key. It's using * the composite dao, so the records it returns will be a composite of both * the profile model and actions model. * * @param lastName * The last name of users to scan. */ public void printUserProfileActionsForLastName(String lastName) { // TODO: use a reader with a start key DatasetReader<UserProfileActionsModel2> reader = userProfileActionsDataset.newReader(); try { for (UserProfileActionsModel2 entity : reader) { UserProfileModel2 userProfile = entity.getUserProfileModel(); if (userProfile.getLastName().equals(lastName)) { System.out.println(entity.toString()); } } } finally { // readers need to be closed. reader.close(); } }
Example 10
Source File: ReadProductDatasetPojo.java From kite-examples with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { // Load the products dataset Dataset<Product> products = Datasets.load( "dataset:hdfs:/tmp/data/products", Product.class); // Get a reader for the dataset and read all the users DatasetReader<Product> reader = null; try { reader = products.newReader(); for (Product product : reader) { System.out.println(product); } } finally { if (reader != null) { reader.close(); } } return 0; }
Example 11
Source File: TestMapReduce.java From kite with Apache License 2.0 | 6 votes |
private void checkOutput(boolean existingPresent) { DatasetReader<GenericData.Record> reader = outputDataset.newReader(); Map<String, Integer> counts = new HashMap<String, Integer>(); for (GenericData.Record record : reader) { counts.put(record.get("name").toString(), (Integer) record.get("count")); } reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); if (existingPresent) { Assert.assertEquals(4, counts.get("date").intValue()); } else { Assert.assertNull(counts.get("date")); } }
Example 12
Source File: TestFileSystemDataset.java From kite with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") private int readTestUsersInPartition(FileSystemDataset<Record> ds, PartitionKey key, String subpartitionName) { int readCount = 0; DatasetReader<Record> reader = null; try { PartitionedDataset<Record> partition = ds.getPartition(key, false); if (subpartitionName != null) { List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(partition.getDescriptor() .getPartitionStrategy()); Assert.assertEquals(1, fieldPartitioners.size()); Assert.assertEquals(subpartitionName, fieldPartitioners.get(0) .getName()); } reader = partition.newReader(); for (GenericData.Record actualRecord : reader) { Assert.assertEquals(actualRecord.toString(), key.get(0), (actualRecord .get("username").hashCode() & Integer.MAX_VALUE) % 2); if (key.getLength() > 1) { Assert.assertEquals(key.get(1), (actualRecord.get("email").hashCode() & Integer.MAX_VALUE) % 3); } readCount++; } } finally { if (reader != null) { reader.close(); } } return readCount; }
Example 13
Source File: DaoViewTest.java From kite with Apache License 2.0 | 5 votes |
private void validRange(View<TestEntity> range, int startIdx, int endIdx) { int cnt = startIdx; DatasetReader<TestEntity> reader = range.newReader(); try { for (TestEntity entity : reader) { Assert.assertEquals(Integer.toString(cnt), entity.getPart1()); Assert.assertEquals(Integer.toString(cnt), entity.getPart2()); cnt++; } } finally { reader.close(); } Assert.assertEquals(endIdx, cnt); }
Example 14
Source File: UserProfileDatasetExample.java From kite with Apache License 2.0 | 5 votes |
/** * Print all user profiles. * * This method demonstrates how to open a reader that will read the entire * table. It has no start or stop keys specified. */ public void printUserProfies() { DatasetReader<UserProfileModel2> reader = userProfileDataset.newReader(); try { for (UserProfileModel2 userProfile : reader) { System.out.println(userProfile.toString()); } } finally { // readers need to be closed. reader.close(); } }
Example 15
Source File: ReadUserDataset.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Load the users dataset // Dataset is named [table].[entity] RandomAccessDataset<User> users = Datasets.load( "dataset:hbase:quickstart.cloudera/users.User", User.class); // Get an accessor for the dataset and look up a user by username Key key = new Key.Builder(users).add("username", "bill").build(); System.out.println(users.get(key)); System.out.println("----"); // Get a reader for the dataset and read the users from "bill" onwards DatasetReader<User> reader = null; try { reader = users.with("username", "bill").newReader(); for (User user : reader) { System.out.println(user); } } finally { if (reader != null) { reader.close(); } } return 0; }
Example 16
Source File: TestAllTables.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public void testMultiTableImportAsParquetFormat() throws IOException { String [] argv = getArgv(new String[]{"--as-parquetfile"}, null); runImport(new ImportAllTablesTool(), argv); Path warehousePath = new Path(this.getWarehouseDir()); int i = 0; for (String tableName : this.tableNames) { Path tablePath = new Path(warehousePath, tableName); Dataset dataset = Datasets.load("dataset:file:" + tablePath); // dequeue the expected value for this table. This // list has the same order as the tableNames list. String expectedVal = Integer.toString(i++) + "," + this.expectedStrings.get(0); this.expectedStrings.remove(0); DatasetReader<GenericRecord> reader = dataset.newReader(); try { GenericRecord record = reader.next(); String line = record.get(0) + "," + record.get(1); assertEquals("Table " + tableName + " expected a different string", expectedVal, line); assertFalse(reader.hasNext()); } finally { reader.close(); } } }
Example 17
Source File: DaoViewTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testRange() { populateTestEntities(10); final AbstractRefinableView<TestEntity> range = new DaoView<TestEntity>(ds, TestEntity.class) .fromAfter(NAMES[0], "1").to(NAMES[0], "9") .fromAfter(NAMES[1], "1").to(NAMES[1], "9"); // Test entity range checks // Note that these are strings, not ints, so lexicographic ordering is used Assert.assertTrue(range.includes(newTestEntity("5", "5"))); Assert.assertTrue(range.includes(newTestEntity("5", "55"))); Assert.assertTrue(range.includes(newTestEntity("9", "89"))); Assert.assertTrue(range.includes(newTestEntity("9", "9"))); Assert.assertFalse(range.includes(newTestEntity("1", "1"))); Assert.assertFalse(range.includes(newTestEntity("1", "0"))); Assert.assertFalse(range.includes(newTestEntity("1", "10"))); Assert.assertFalse(range.includes(newTestEntity("9", "99"))); DatasetReader<TestEntity> reader = range.newReader(); int cnt = 2; try { for (TestEntity entity : reader) { Assert.assertEquals(Integer.toString(cnt), entity.getPart1()); Assert.assertEquals(Integer.toString(cnt), entity.getPart2()); cnt++; } } finally { reader.close(); } Assert.assertEquals(10, cnt); }
Example 18
Source File: TestHiveImport.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
private void verifyHiveDataset(String tableName, Object[][] valsArray) { String datasetUri = String.format("dataset:hive:default/%s", tableName.toLowerCase()); assertTrue(Datasets.exists(datasetUri)); Dataset dataset = Datasets.load(datasetUri); assertFalse(dataset.isEmpty()); DatasetReader<GenericRecord> reader = dataset.newReader(); try { List<String> expectations = new ArrayList<String>(); if (valsArray != null) { for (Object[] vals : valsArray) { expectations.add(Arrays.toString(vals)); } } while (reader.hasNext() && expectations.size() > 0) { String actual = Arrays.toString( convertGenericRecordToArray(reader.next())); assertTrue("Expect record: " + actual, expectations.remove(actual)); } assertFalse(reader.hasNext()); assertEquals(0, expectations.size()); } finally { reader.close(); } }
Example 19
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 4 votes |
@Test public void testUseReaderSchemaParquet() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder() .format(Formats.PARQUET).schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder() .format(Formats.PARQUET).schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 20
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 4 votes |
@Test public void testUseReaderSchema() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }