org.kitesdk.data.PartitionStrategy Java Examples

The following examples show how to use org.kitesdk.data.PartitionStrategy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TimeDomain.java    From kite with Apache License 2.0 6 votes vote down vote up
public TimeDomain(PartitionStrategy strategy, String sourceName) {
  Map<Integer, CalendarFieldPartitioner> mapping = Maps.newHashMap();
  for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) {
    // there may be partitioners for more than one source field
    if (sourceName.equals(fp.getSourceName()) &&
        fp instanceof CalendarFieldPartitioner) {
      mapping.put(
          ((CalendarFieldPartitioner) fp).getCalendarField(),
          (CalendarFieldPartitioner) fp);
    }
  }
  // get the partitioners to check for this strategy
  this.partitioners = Lists.newArrayList();
  for (int field : order) {
    // if there is no partition for the next field, then all are included
    // example: yyyy/mm/dd partitioning accepts when field is hour
    if (mapping.containsKey(field)) {
      partitioners.add(mapping.get(field));
    } else if (!partitioners.isEmpty()) {
      break;
    }
  }
}
 
Example #2
Source File: TestCreatePartitionStrategyCommand.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testTime() throws Exception {
  command.partitions = Lists.newArrayList(
      "created_at:year", "created_at:month", "created_at:day",
      "created_at:hour", "created_at:minute"
  );
  command.run();

  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .year("created_at")
      .month("created_at")
      .day("created_at")
      .hour("created_at")
      .minute("created_at")
      .build();
  verify(console).info(strategy.toString(true));
  verifyNoMoreInteractions(console);
}
 
Example #3
Source File: TestCompatibilityChecks.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testUpdateNonProvided() {
  final PartitionStrategy provided = new PartitionStrategy.Builder()
      .identity("s", "part")
      .build();

  TestHelpers.assertThrows("Should not allow replacing if not provided",
      ValidationException.class, new Runnable() {
        @Override
        public void run() {
          Compatibility.checkStrategyUpdate(
              provided,
              new PartitionStrategy.Builder()
                  .dateFormat("l", "part", "yyyy-MM-dd")
                  .build(),
              PROVIDED_TEST_SCHEMA);
        }
      });
}
 
Example #4
Source File: PartitionKey.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * <p>
 * Construct a partition key for the given entity, reusing the supplied key if
 * not null.
 * </p>
 * <p>
 * This is a convenient way to find the partition that a given entity is
 * written to, or to find a partition using objects from the entity domain.
 * </p>
 */
@SuppressWarnings("unchecked")
public static <E> PartitionKey partitionKeyForEntity(PartitionStrategy strategy,
    E entity, EntityAccessor<E> accessor, @Nullable PartitionKey reuseKey) {
  List<FieldPartitioner> fieldPartitioners =
      Accessor.getDefault().getFieldPartitioners(strategy);

  PartitionKey key = (reuseKey == null ?
      new PartitionKey(new Object[fieldPartitioners.size()]) : reuseKey);

  for (int i = 0; i < fieldPartitioners.size(); i++) {
    FieldPartitioner fp = fieldPartitioners.get(i);
    key.set(i, fp.apply(accessor.get(entity, fp.getSourceName())));
  }
  return key;
}
 
Example #5
Source File: TestTableConversion.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertTableWithRequiredFields() {
  Schema recordSchema = Schema.createRecord("inner", null, null, false);
  recordSchema.setFields(Lists.newArrayList(
      new Schema.Field("a", Schema.create(Schema.Type.INT), null, null),
      new Schema.Field("b",
          optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT)
  ));
  Schema structOfStructsSchema = Schema.createRecord("test", null, null, false);
  structOfStructsSchema.setFields(Lists.newArrayList(
      new Schema.Field("str", Schema.create(Schema.Type.STRING), null, null),
      new Schema.Field("inner", recordSchema, null, null)
  ));

  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .provided("not_present", "int")
      .hash("inner.a", 16) // requires both inner and inner.a
      .identity("str")
      .build();

  Assert.assertEquals("Should convert table named test",
      structOfStructsSchema,
      HiveSchemaConverter.convertTable("test", TABLE, strategy));
}
 
Example #6
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testPartitionedSourceAndTarget() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

  writeTestUsers(inputDataset, 10);

  PartitionKey key = new PartitionKey(0);
  Dataset<Record> inputPart0 =
      ((PartitionedDataset<Record>) inputDataset).getPartition(key, false);
  Dataset<Record> outputPart0 =
      ((PartitionedDataset<Record>) outputDataset).getPartition(key, true);

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputPart0));
  pipeline.write(data, CrunchDatasets.asTarget(outputPart0), Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(5, datasetSize(outputPart0));
}
 
Example #7
Source File: TestDatasetWriterCacheLoader.java    From kite with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws IOException {
  this.conf = new Configuration();
  this.fileSystem = FileSystem.get(conf);
  this.testDirectory = new Path(Files.createTempDir().getAbsolutePath());
  this.repo = new FileSystemDatasetRepository(conf, testDirectory,
    new EnusrePartitionPathDoesNotExistMetadataProvider(conf, testDirectory));

  partitionStrategy = new PartitionStrategy.Builder()
    .hash("username", 2).build();
  FileSystemDataset<Object> users = (FileSystemDataset<Object>) repo.create(
    "ns", "users",
    new DatasetDescriptor.Builder()
    .schema(USER_SCHEMA)
    .partitionStrategy(partitionStrategy)
    .build());
  view = new FileSystemView<Object>(users, null, null, Object.class);
}
 
Example #8
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testSourceView() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).format(Formats.PARQUET).build());

  writeTestUsers(inputDataset, 10);

  View<Record> inputView = inputDataset.with("username", "test-0");
  Assert.assertEquals(1, datasetSize(inputView));

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputView));
  pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(1, datasetSize(outputDataset));
}
 
Example #9
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testTargetView() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

  writeTestUsers(inputDataset, 10);

  View<Record> inputView = inputDataset.with("username", "test-0");
  Assert.assertEquals(1, datasetSize(inputView));
  View<Record> outputView = outputDataset.with("username", "test-0");

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputView));
  pipeline.write(data, CrunchDatasets.asTarget(outputView), Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(1, datasetSize(outputDataset));
}
 
Example #10
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testTargetViewProvidedPartition() throws IOException {
    PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().provided("version").build();

    Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
    Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
            .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

    View<Record> inputView = inputDataset.with("version", "test-version-0");

    writeTestUsers(inputView, 1);

    Assert.assertEquals(1, datasetSize(inputView));
    View<Record> outputView = outputDataset.with("version", "test-version-0");

    Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
    PCollection<GenericData.Record> data = pipeline.read(
            CrunchDatasets.asSource(inputView));
    pipeline.write(data, CrunchDatasets.asTarget(outputView), Target.WriteMode.APPEND);
    pipeline.run();

    Assert.assertEquals(1, datasetSize(outputDataset));
}
 
Example #11
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testDatasetUris() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

  writeTestUsers(inputDataset, 10);

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(new URIBuilder(repo.getUri(), "ns", "in").build(),
          GenericData.Record.class));
  pipeline.write(data, CrunchDatasets.asTarget(
      new URIBuilder(repo.getUri(), "ns", "out").build()), Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(10, datasetSize(outputDataset));
}
 
Example #12
Source File: TestPartitionStrategyParser.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testIdentity() {
  // right now, the field type is taken from the Schema
  checkParser(new PartitionStrategy.Builder()
          .identity("username", "id")
          .build(),
      "[ {\"type\": \"identity\", " +
          "\"source\": \"username\", " +
          "\"name\": \"id\"} ]"
  );
  checkParser(new PartitionStrategy.Builder()
          .identity("username", "username_copy")
          .build(),
      "[ {\"type\": \"identity\", \"source\": \"username\"} ]"
  );
}
 
Example #13
Source File: FileSystemUtil.java    From kite with Apache License 2.0 6 votes vote down vote up
public static PartitionStrategy strategy(FileSystem fs, Path location) throws IOException {
  if (!fs.exists(location)) {
    return null;
  }

  List<Pair<String, Class<? extends Comparable>>> pairs = visit(
      new GetPartitionInfo(), fs, location);

  if (pairs == null || pairs.isEmpty() || pairs.size() <= 1) {
    return null;
  }

  PartitionStrategy.Builder builder = new PartitionStrategy.Builder();

  // skip the initial partition because it is the containing directory
  for (int i = 1; i < pairs.size(); i += 1) {
    Pair<String, Class<? extends Comparable>> pair = pairs.get(i);
    builder.provided(
        pair.first() == null ? "partition_" + i : pair.first(),
        ProvidedFieldPartitioner.valuesString(pair.second()));
  }

  return builder.build();
}
 
Example #14
Source File: TestPartitionStrategyParser.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testAddEmbeddedPartitionStrategy() {
  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .hash("username", 16)
      .identity("username", "u")
      .build();
  Schema original = new Schema.Parser().parse("{" +
      "  \"type\": \"record\"," +
      "  \"name\": \"User\"," +
      "  \"fields\": [" +
      "    {\"name\": \"id\", \"type\": \"long\"}," +
      "    {\"name\": \"username\", \"type\": \"string\"}," +
      "    {\"name\": \"real_name\", \"type\": \"string\"}" +
      "  ]" +
      "}");
  Schema embedded = PartitionStrategyParser.embedPartitionStrategy(original, strategy);

  Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(embedded));
  Assert.assertEquals(strategy, PartitionStrategyParser.parseFromSchema(embedded));
}
 
Example #15
Source File: AvroKeySerDe.java    From kite with Apache License 2.0 6 votes vote down vote up
public AvroKeySerDe(Schema schema, PartitionStrategy partitionStrategy) {
  this.schema = schema;
  int fieldSize = schema.getFields().size();
  partialSchemas = new Schema[fieldSize];
  for (int i = 0; i < fieldSize; i++) {
    if (i == (fieldSize - 1)) {
      break;
    }
    List<Field> partialFieldList = new ArrayList<Field>();
    for (Field field : schema.getFields().subList(0, i + 1)) {
      partialFieldList.add(AvroUtils.cloneField(field));
    }
    partialSchemas[i] = Schema.createRecord(partialFieldList);
  }
  this.partitionStrategy = partitionStrategy;
}
 
Example #16
Source File: EntityAccessor.java    From kite with Apache License 2.0 5 votes vote down vote up
public StorageKey keyFor(E object, @Nullable Map<String, Object> provided,
                         StorageKey reuse) {
  Preconditions.checkNotNull(reuse, "Cannot use null key");
  PartitionStrategy strategy = reuse.getPartitionStrategy();
  List<FieldPartitioner> partitioners =
      Accessor.getDefault().getFieldPartitioners(strategy);
  for (int i = 0, n = partitioners.size(); i < n; i += 1) {
    reuse.replace(i, partitionValue(object, provided, partitioners.get(i)));
  }
  return reuse;
}
 
Example #17
Source File: TestCompatibilityChecks.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testIllegalPartitionNames() {
  // no need to check sources because '.' and '-' aren't allowed in schemas
  TestHelpers.assertThrows("Should reject '-' in partition name",
      ValidationException.class, new Runnable() {
    @Override
    public void run() {
      Compatibility.checkDescriptor(
          new DatasetDescriptor.Builder()
              .schema(schema)
              .partitionStrategy(new PartitionStrategy.Builder()
                  .identity("day_of_month", "day-of-month")
                  .build())
              .build());
    }
  });
  TestHelpers.assertThrows("Should reject '.' in partition name",
      ValidationException.class, new Runnable() {
    @Override
    public void run() {
      Compatibility.checkDescriptor(
          new DatasetDescriptor.Builder()
              .schema(schema)
              .partitionStrategy(new PartitionStrategy.Builder()
                  .identity("number", "day.of.month")
                  .build())
              .build());
    }
  });
}
 
Example #18
Source File: TestPartitionStrategyParser.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testHash() {
  checkParser(new PartitionStrategy.Builder().hash("id", 64).build(),
      "[ {\"type\": \"hash\", \"source\": \"id\", \"buckets\": 64} ]");
  checkParser(new PartitionStrategy.Builder().hash("id", "h", 64).build(),
      "[ {\"type\": \"hash\", " +
          "\"source\": \"id\", " +
          "\"name\": \"h\", " +
          "\"buckets\": 64} ]"
  );

  TestHelpers.assertThrows("Should reject missing buckets",
      ValidationException.class, new Runnable() {
        @Override
        public void run() {
          PartitionStrategyParser.parse("[ {\"type\": \"hash\", " +
              "\"source\": \"id\", " +
              "\"name\": \"h\"} ]");
        }
      }
  );
  TestHelpers.assertThrows("Should reject invalid buckets",
      ValidationException.class, new Runnable() {
        @Override
        public void run() {
          PartitionStrategyParser.parse("[ {\"type\": \"hash\", " +
              "\"source\": \"id\", " +
              "\"name\": \"h\", " +
              "\"buckets\": \"green\"} ]");
        }
      }
  );
}
 
Example #19
Source File: PartitionStrategyParser.java    From kite with Apache License 2.0 5 votes vote down vote up
public static Schema embedPartitionStrategy(Schema schema, PartitionStrategy strategy) {
  // TODO: avoid embedding strategies in the schema
  // Avro considers Props read-only and uses an older Jackson version
  // Parse the Schema as a String because Avro uses com.codehaus.jackson
  ObjectNode schemaJson = JsonUtil.parse(schema.toString(), ObjectNode.class);
  schemaJson.set(PARTITIONS, toJson(strategy));
  return new Schema.Parser().parse(schemaJson.toString());
}
 
Example #20
Source File: TestFileSystemUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleParquetFilesInSeparateFolders() throws Exception {
  File folder = temp.newFolder("a/b/c/d/e");
  Path root = new Path(temp.getRoot().toURI());
  FileSystem fs = LocalFileSystem.getInstance();

  // create a two Avro files under separate folders
  Path parent = new Path(folder.toURI());
  createParquetEventFile(fs, new Path(parent, "part"));
  createParquetEventFile(fs, new Path(parent, "2"));

  DatasetDescriptor descriptor = Iterables.getOnlyElement(
      FileSystemUtil.findPotentialDatasets(fs, root));

  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .provided("partition_1", "string")
      .build();

  Assert.assertFalse("Should not flag at mixed depth",
      descriptor.hasProperty("kite.filesystem.mixed-depth"));
  Assert.assertEquals("Should be directly under parent",
      parent.toUri(), descriptor.getLocation());
  Assert.assertEquals("Should use user schema",
      EVENT_SCHEMA, descriptor.getSchema());
  Assert.assertEquals("Should have Parquet format",
      Formats.PARQUET, descriptor.getFormat());
  Assert.assertEquals("Should be partitioned by part=int",
      strategy, descriptor.getPartitionStrategy());
}
 
Example #21
Source File: TestPathConversion.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testToKey() {
  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .year("timestamp")
      .month("timestamp")
      .day("timestamp")
      .build();

  StorageKey expected = new StorageKey(strategy);
  expected.replaceValues((List) Lists.newArrayList(2013, 11, 5));

  Assert.assertEquals(expected, convert.toKey(
      new Path("year=2013/month=11/day=5"), new StorageKey(strategy)));
}
 
Example #22
Source File: TestFileSystemDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpdateFailsWithPartitionStrategyChange() {
  PartitionStrategy ps1 = new PartitionStrategy.Builder()
      .hash("username", 2)
      .build();
  PartitionStrategy ps2 = new PartitionStrategy.Builder()
      .hash("username", 2)
      .hash("email", 3)
      .build();

  Dataset<Record> dataset = repo.create(NAMESPACE, NAME,
      new DatasetDescriptor.Builder(testDescriptor)
          .partitionStrategy(ps1)
          .build());

  DatasetDescriptor changed =
      new DatasetDescriptor.Builder(dataset.getDescriptor())
          .partitionStrategy(ps2)
          .build();

  try {
    repo.update(NAMESPACE, NAME, changed);
    Assert.fail("Should fail due to partition strategy change");
  } catch (ValidationException e) {
    // expected
  }

  Assert.assertEquals(
      ps1, repo.load(NAMESPACE, NAME).getDescriptor().getPartitionStrategy());
}
 
Example #23
Source File: TestSimpleView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testRefineIdentity() throws Exception {
    PartitionStrategy strategy = new PartitionStrategy.Builder()
            .identity("user_id")
            .build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
            .schemaUri("resource:standard_event.avsc")
            .partitionStrategy(strategy)
            .build();

    // Create a separate dataset to avoid conflicts with the above.
    Dataset<StandardEvent> identityDataset = repo.create(
        "ns", "test_identity", descriptor);

    DatasetWriter<StandardEvent> writer = null;

    try {
        writer = identityDataset.newWriter();
        writer.write(sepEvent);
        writer.write(octEvent);
        writer.write(novEvent);
    } finally {
        Closeables.close(writer, false);
    }

    assertContentEquals(Sets.newHashSet(sepEvent, novEvent),
            identityDataset.with("user_id", 0L));
}
 
Example #24
Source File: Constraints.java    From kite with Apache License 2.0 5 votes vote down vote up
private Constraints(Schema schema, PartitionStrategy strategy,
                    Map<String, Predicate> constraints,
                    Map<String, Object> provided) {
  this.schema = schema;
  this.strategy = strategy;
  this.constraints = constraints;
  this.provided = provided;
}
 
Example #25
Source File: AvroKeyEntitySchemaParser.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public AvroKeySchema parseKeySchema(String rawSchema,
    PartitionStrategy partitionStrategy) {
  // use DatasetDescriptor.Builder because it checks consistency
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaLiteral(rawSchema)
      .partitionStrategy(partitionStrategy)
      .build();
  return new AvroKeySchema(
      descriptor.getSchema(), descriptor.getPartitionStrategy());
}
 
Example #26
Source File: TestDateFormatPartitioner.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testExpressionRoundTrip() {
  PartitionStrategy strategy = new PartitionStrategy.Builder()
      .dateFormat("timestamp", "day", "yyyy-MM-dd")
      .build();
  PartitionStrategy copy = Accessor.getDefault().fromExpression(
      Accessor.getDefault().toExpression(strategy));
  Assert.assertEquals(strategy, copy);
}
 
Example #27
Source File: TestMetadataProviders.java    From kite with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws IOException, URISyntaxException {
  this.conf = (distributed ?
      MiniDFSTest.getConfiguration() :
      new Configuration());
  this.testDescriptor = new DatasetDescriptor.Builder()
      .format(Formats.AVRO)
      .schema(SchemaBuilder.record("Event").fields()
          .requiredLong("timestamp")
          .requiredString("message")
          .endRecord())
      .partitionStrategy(new PartitionStrategy.Builder()
          .year("timestamp")
          .month("timestamp")
          .day("timestamp")
          .build())
      .build();
  // something completely different
  this.anotherDescriptor = new DatasetDescriptor.Builder()
      .format(Formats.PARQUET)
      .schema(SchemaBuilder.record("Record").fields()
          .requiredBytes("some_field")
          .requiredString("another_field")
          .endRecord())
      .partitionStrategy(new PartitionStrategy.Builder()
          .hash("some_field", 20000)
          .build())
      .build();

  this.provider = newProvider(conf);
}
 
Example #28
Source File: StorageKey.java    From kite with Apache License 2.0 5 votes vote down vote up
private StorageKey(PartitionStrategy strategy, List<Object> values) {
  try {
    this.fields = FIELD_CACHE.get(strategy);
  } catch (ExecutionException ex) {
    throw new RuntimeException("[BUG] Could not get field map");
  }
  Preconditions.checkArgument(values.size() == fields.size(),
      "Not enough values for a complete StorageKey");
  this.strategy = strategy;
  this.values = values;
  this.path = null;
}
 
Example #29
Source File: TestPartitionStrategyParser.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMinute() {
  checkParser(new PartitionStrategy.Builder().minute("time").build(),
      "[ {\"type\": \"minute\", \"source\": \"time\"} ]");
  checkParser(new PartitionStrategy.Builder().minute("time", "m").build(),
      "[ {\"type\": \"minute\", \"source\": \"time\", \"name\": \"m\"} ]");
}
 
Example #30
Source File: Constraints.java    From kite with Apache License 2.0 5 votes vote down vote up
private static <E> Predicate<E> entityPredicate(
    Map<String, Predicate> predicates, Schema schema,
    EntityAccessor<E> accessor,
    PartitionStrategy strategy) {
  if (Schema.Type.RECORD != schema.getType()) {
    return alwaysTrue();
  }
  return new EntityPredicate<E>(predicates, schema, accessor, strategy);
}