Java Code Examples for org.kitesdk.data.DatasetWriter#write()

The following examples show how to use org.kitesdk.data.DatasetWriter#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestProjection.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testSpecificProjectionLoad() throws IOException {
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  Dataset<SmallEvent> dataset = repo.load(
      "ns", unbounded.getDataset().getName(),
      SmallEvent.class);

  Set<SmallEvent> expected = Sets.newHashSet(toSmallEvent(sepEvent),
      toSmallEvent(octEvent), toSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example 2
Source File: DatasetTestUtilities.java    From kite with Apache License 2.0 6 votes vote down vote up
public static void writeTestUsers(View<GenericData.Record> view, int count, int start, String... fields) {
  DatasetWriter<GenericData.Record> writer = null;
  try {
    writer = view.newWriter();
    for (int i = start; i < count + start; i++) {
      GenericRecordBuilder recordBuilder = new GenericRecordBuilder(view.getDataset().getDescriptor
          ().getSchema()).set("username", "test-" + i);
      for (String field : fields) {
        recordBuilder.set(field, field + "-" + i);
      }
      writer.write(recordBuilder.build());
    }
    if (writer instanceof Flushable) {
      ((Flushable) writer).flush();
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example 3
Source File: TestPartitionReplacement.java    From kite with Apache License 2.0 6 votes vote down vote up
private static void writeTestRecords(View<TestRecord> view) {
  DatasetWriter<TestRecord> writer = null;
  try {
    writer = view.newWriter();
    for (int i = 0; i < 10; i += 1) {
      TestRecord record = new TestRecord();
      record.id = i;
      record.data = "test-" + i;
      writer.write(record);
    }

  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example 4
Source File: TestProjection.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testMixedProjection() throws IOException {
  Dataset<StandardEvent> original = repo.create("ns", "mixedProjection",
      new DatasetDescriptor.Builder()
          .schema(StandardEvent.class)
          .build(), StandardEvent.class);

  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = original.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(),
      ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example 5
Source File: PartitionedDatasetWriter.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public void write(E entity) {
  Preconditions.checkState(state.equals(ReaderWriterState.OPEN),
      "Attempt to write to a writer in state:%s", state);

  accessor.keyFor(entity, provided, reusedKey);

  DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey);
  if (writer == null) {
    // avoid checking in every whether the entity belongs in the view by only
    // checking when a new writer is created
    Preconditions.checkArgument(view.includes(entity),
        "View %s does not include entity %s", view, entity);
    // get a new key because it is stored in the cache
    StorageKey key = StorageKey.copy(reusedKey);
    try {
      writer = cachedWriters.getUnchecked(key);
    } catch (UncheckedExecutionException ex) {
      throw new IllegalArgumentException(
          "Problem creating view for entity: " + entity, ex.getCause());
    }
  }

  writer.write(entity);
}
 
Example 6
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testReflectProjectionAsType() throws IOException {
  Dataset<StandardEvent> original = repo.create(
      "ns", "reflectProjection",
      new DatasetDescriptor.Builder()
          .schema(StandardEvent.class)
          .build(),
      StandardEvent.class);

  DatasetWriter<ReflectStandardEvent> writer = null;
  try {
    writer = original.asType(ReflectStandardEvent.class).newWriter();
    writer.write(new ReflectStandardEvent(sepEvent));
    writer.write(new ReflectStandardEvent(octEvent));
    writer.write(new ReflectStandardEvent(novEvent));
  } finally {
    Closeables.close(writer, false);
  }

  final View<ReflectSmallEvent> smallEvents = original.asType(ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, smallEvents);

  TestHelpers.assertThrows("Should not be able to write small events",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          smallEvents.newWriter();
        }
      });
}
 
Example 7
Source File: TestFileSystemUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
public void writeUserToView(View<GenericRecord> dataset) {
  DatasetWriter<GenericRecord> writer = null;
  try {
    writer = dataset.newWriter();
    writer.write(USER);
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example 8
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testIncompatibleProjection() throws IOException {
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  TestHelpers.assertThrows(
      "Should not load a dataset with an incompatible class",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          repo.load("ns", unbounded.getDataset().getName(),
              IncompatibleEvent.class);
        }
      });

  TestHelpers.assertThrows("Should reject a schema that can't read or write",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          unbounded.asType(IncompatibleEvent.class);
        }
      });

  TestHelpers.assertThrows("Should reject a schema that can't read or write",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          unbounded.getDataset().asType(IncompatibleEvent.class);
        }
      });
}
 
Example 9
Source File: TestFileSystemView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testUnboundedMoveToTrash() throws Exception {
  // NOTE: this is an un-restricted write so all should succeed
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  final Path root = new Path("target/data/ns/test");
  final Path y2013 = new Path("target/data/ns/test/year=2013");
  final Path sep = new Path("target/data/ns/test/year=2013/month=09");
  final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12");
  final Path oct = new Path("target/data/ns/test/year=2013/month=10");
  final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12");
  final Path nov = new Path("target/data/ns/test/year=2013/month=11");
  final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11");
  assertDirectoriesExist(fs, root, y2013, sep, sep12, oct, oct12, nov, nov11);

  Assert.assertTrue("Delete should return true to indicate data was deleted.",
      unbounded.moveToTrash());
  assertDirectoriesDoNotExist(fs, y2013, sep12, sep, oct12, oct, nov11, nov);
  assertDirectoriesExist(fs, root);
}
 
Example 10
Source File: TestFileSystemView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testUnboundedDelete() throws Exception {
  // NOTE: this is an un-restricted write so all should succeed
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  final Path root = new Path("target/data/ns/test");
  final Path y2013 = new Path("target/data/ns/test/year=2013");
  final Path sep = new Path("target/data/ns/test/year=2013/month=09");
  final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12");
  final Path oct = new Path("target/data/ns/test/year=2013/month=10");
  final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12");
  final Path nov = new Path("target/data/ns/test/year=2013/month=11");
  final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11");
  assertDirectoriesExist(fs, root, y2013, sep, sep12, oct, oct12, nov, nov11);

  Assert.assertTrue("Delete should return true to indicate data was deleted.",
      unbounded.deleteAll());
  assertDirectoriesDoNotExist(fs, y2013, sep12, sep, oct12, oct, nov11, nov);
  assertDirectoriesExist(fs, root);
}
 
Example 11
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testReflectProjectionLoad() throws IOException {
  Dataset<ReflectStandardEvent> original = repo.create(
      "ns", "reflectProjection",
      new DatasetDescriptor.Builder()
          .schema(ReflectStandardEvent.class)
          .build(),
      ReflectStandardEvent.class);

  DatasetWriter<ReflectStandardEvent> writer = null;
  try {
    writer = original.newWriter();
    writer.write(new ReflectStandardEvent(sepEvent));
    writer.write(new ReflectStandardEvent(octEvent));
    writer.write(new ReflectStandardEvent(novEvent));
  } finally {
    Closeables.close(writer, false);
  }

  View<ReflectSmallEvent> dataset = repo.load("ns", original.getName(),
      ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example 12
Source File: TestCrunchDatasetsHBase.java    From kite with Apache License 2.0 5 votes vote down vote up
private void writeRecords(Dataset<GenericRecord> dataset, int count) {
  DatasetWriter<GenericRecord> writer = dataset.newWriter();
  try {
    for (int i = 0; i < count; ++i) {
      GenericRecord entity = HBaseDatasetRepositoryTest.createGenericEntity(i);
      writer.write(entity);
    }
  } finally {
    writer.close();
  }
}
 
Example 13
Source File: TestPartitionedDatasetWriter.java    From kite with Apache License 2.0 5 votes vote down vote up
private static <E> void writeToView(View<E> view, E... entities) {
  DatasetWriter<E> writer = null;
  try {
    writer = view.newWriter();
    for (E entity : entities) {
      writer.write(entity);
    }
    writer.close();
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example 14
Source File: DaoViewTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testLimitedWriter() {
  final View<TestEntity> range = ds
      .fromAfter(NAMES[0], "1").to(NAMES[0], "5")
      .fromAfter(NAMES[1], "1").to(NAMES[1], "5");
  DatasetWriter<TestEntity> writer = range.newWriter();
  try {
    writer.write(newTestEntity("3", "3"));
    writer.write(newTestEntity("5", "5"));
  } finally {
    writer.close();
  }
}
 
Example 15
Source File: TestSimpleView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testRefineIdentity() throws Exception {
    PartitionStrategy strategy = new PartitionStrategy.Builder()
            .identity("user_id")
            .build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
            .schemaUri("resource:standard_event.avsc")
            .partitionStrategy(strategy)
            .build();

    // Create a separate dataset to avoid conflicts with the above.
    Dataset<StandardEvent> identityDataset = repo.create(
        "ns", "test_identity", descriptor);

    DatasetWriter<StandardEvent> writer = null;

    try {
        writer = identityDataset.newWriter();
        writer.write(sepEvent);
        writer.write(octEvent);
        writer.write(novEvent);
    } finally {
        Closeables.close(writer, false);
    }

    assertContentEquals(Sets.newHashSet(sepEvent, novEvent),
            identityDataset.with("user_id", 0L));
}
 
Example 16
Source File: CreateProductDatasetPojo.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {

  // Create a dataset of products with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(Product.class)
      .build();
  Dataset<Product> products = Datasets.create(
      "dataset:hdfs:/tmp/data/products", descriptor, Product.class);

  // Get a writer for the dataset and write some products to it
  DatasetWriter<Product> writer = null;
  try {
    writer = products.newWriter();
    int i = 0;
    for (String name : names) {
      Product product = new Product();
      product.setName(name);
      product.setId(i++);
      writer.write(product);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example 17
Source File: CreateUserDatasetGenericParquet.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .format(Formats.PARQUET)
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example 18
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 4 votes vote down vote up
@Test
public void testUseReaderSchemaParquet() throws IOException {

  // Create a schema with only a username, so we can test reading it
  // with an enhanced record structure.
  Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord")
      .fields()
      .requiredString("username")
      .endRecord();

  // create the dataset
  Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .format(Formats.PARQUET).schema(oldRecordSchema).build());

  Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .format(Formats.PARQUET).schema(oldRecordSchema).build());
  Record oldUser = new Record(oldRecordSchema);
  oldUser.put("username", "user");

  DatasetWriter<Record> writer = in.newWriter();

  try {

    writer.write(oldUser);

  } finally {
    writer.close();
  }

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);

  // read data from updated dataset that has the new schema.
  // At this point, User class has the old schema
  PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(),
      NewUserRecord.class));

  PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(),
      Avros.records(NewUserRecord.class));

  pipeline.write(processed, CrunchDatasets.asTarget(out));

  DatasetReader reader = out.newReader();

  Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded());

  try {

    // there should be one record that is equal to our old user generic record.
    Assert.assertEquals(oldUser, reader.next());
    Assert.assertFalse(reader.hasNext());

  } finally {
    reader.close();
  }
}
 
Example 19
Source File: TestMapReduce.java    From kite with Apache License 2.0 4 votes vote down vote up
private void populateOutputDataset() {
  DatasetWriter<GenericData.Record> writer = outputDataset.newWriter();
  writer.write(newStatsRecord(4, "date"));
  writer.close();
}
 
Example 20
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 4 votes vote down vote up
@Test
public void testUseReaderSchema() throws IOException {

  // Create a schema with only a username, so we can test reading it
  // with an enhanced record structure.
  Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord")
      .fields()
      .requiredString("username")
      .endRecord();

  // create the dataset
  Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(oldRecordSchema).build());
  Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(oldRecordSchema).build());
  Record oldUser = new Record(oldRecordSchema);
  oldUser.put("username", "user");

  DatasetWriter<Record> writer = in.newWriter();

  try {

    writer.write(oldUser);

  } finally {
    writer.close();
  }

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);

  // read data from updated dataset that has the new schema.
  // At this point, User class has the old schema
  PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(),
      NewUserRecord.class));

  PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(),
      Avros.records(NewUserRecord.class));

  pipeline.write(processed, CrunchDatasets.asTarget(out));

  DatasetReader reader = out.newReader();

  Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded());

  try {

    // there should be one record that is equal to our old user generic record.
    Assert.assertEquals(oldUser, reader.next());
    Assert.assertFalse(reader.hasNext());

  } finally {
    reader.close();
  }
}