org.apache.hadoop.io.compress.DeflateCodec Java Examples

The following examples show how to use org.apache.hadoop.io.compress.DeflateCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BaseHdfsTargetIT.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomCompressionCodec() throws Exception {
  HdfsDTarget dTarget = new ForTestHdfsTarget();
  configure(dTarget);
  dTarget.hdfsTargetConfigBean.compression = CompressionMode.OTHER;
  dTarget.hdfsTargetConfigBean.otherCompression = DeflateCodec.class.getName();
  HdfsTarget target = (HdfsTarget) dTarget.createTarget();
  try {
    Target.Context context = ContextInfoCreator.createTargetContext(HdfsDTarget.class, "n", false,
      OnRecordError.TO_ERROR, null);
    target.init(null, context);
    Assert.assertEquals(DeflateCodec.class, target.getCompressionCodec().getClass());
  } finally {
    target.destroy();
  }
}
 
Example #2
Source File: TestInsertQuery.java    From tajo with Apache License 2.0 6 votes vote down vote up
@Test
public final void testInsertOverwriteWithCompression() throws Exception {
  String tableName = IdentifierUtil.normalizeIdentifier("testInsertOverwriteWithCompression");
  ResultSet res = executeFile("testInsertOverwriteWithCompression_ddl.sql");
  res.close();

  CatalogService catalog = testingCluster.getMaster().getCatalog();
  assertTrue(catalog.existsTable(getCurrentDatabase(), tableName));

  res = executeQuery();
  res.close();
  TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(2, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());

  for (FileStatus file : fs.listStatus(new Path(desc.getUri()))) {
    CompressionCodec codec = factory.getCodec(file.getPath());
    assertTrue(codec instanceof DeflateCodec);
  }
  executeString("DROP TABLE " + tableName + " PURGE");
}
 
Example #3
Source File: TestInsertQuery.java    From tajo with Apache License 2.0 6 votes vote down vote up
@Test
public final void testInsertOverwriteLocationWithCompression() throws Exception {
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    ResultSet res = executeQuery();
    res.close();
    FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
    Path path = new Path("/tajo-data/testInsertOverwriteLocationWithCompression");
    assertTrue(fs.exists(path));
    assertEquals(1, fs.listStatus(path).length);

    CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());
    for (FileStatus file : fs.listStatus(path)){
      CompressionCodec codec = factory.getCodec(file.getPath());
      assertTrue(codec instanceof DeflateCodec);
    }
  }
}
 
Example #4
Source File: TestInsertQuery.java    From incubator-tajo with Apache License 2.0 6 votes vote down vote up
@Test
public final void testInsertOverwriteWithCompression() throws Exception {
  String tableName = "testInsertOverwriteWithCompression";
  ResultSet res = tpch.execute("create table " + tableName + " (col1 int4, col2 int4, col3 float8) USING csv WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec')");
  res.close();
  TajoTestingCluster cluster = tpch.getTestingCluster();
  CatalogService catalog = cluster.getMaster().getCatalog();
  assertTrue(catalog.existsTable(tableName));

  res = tpch.execute("insert overwrite into " + tableName + " select  l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey = 3");
  res.close();
  TableDesc desc = catalog.getTableDesc(tableName);
  assertEquals(2, desc.getStats().getNumRows().intValue());

  FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration());
  assertTrue(fs.exists(desc.getPath()));
  CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration());

  for (FileStatus file : fs.listStatus(desc.getPath())){
    CompressionCodec codec = factory.getCodec(file.getPath());
    assertTrue(codec instanceof DeflateCodec);
  }
}
 
Example #5
Source File: TestInsertQuery.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test
public final void testInsertOverwritePathWithNonFromQuery() throws Exception {
  ResultSet res = executeString("insert overwrite into location " +
      "'/tajo-data/testInsertOverwritePathWithNonFromQuery' " +
      "USING text WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
      "select 1::INT4, 2.1::FLOAT4, 'test'");

  res.close();
  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  Path path = new Path("/tajo-data/testInsertOverwritePathWithNonFromQuery");
  assertTrue(fs.exists(path));
  assertEquals(1, fs.listStatus(path).length);

  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());
  FileStatus file = fs.listStatus(path)[0];
  CompressionCodec codec = factory.getCodec(file.getPath());
  assertTrue(codec instanceof DeflateCodec);

  try (BufferedReader reader = new BufferedReader(
          new InputStreamReader(codec.createInputStream(fs.open(file.getPath()))))) {
    String line = reader.readLine();
    assertNotNull(line);

    String[] tokens = line.split("\\|");

    assertEquals(3, tokens.length);
    assertEquals("1", tokens[0]);
    assertEquals("2.1", tokens[1]);
    assertEquals("test", tokens[2]);
  }
}
 
Example #6
Source File: TestTablePartitions.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
@Test
public final void testColumnPartitionedTableByOneColumnsWithCompression() throws Exception {
  String tableName = "testColumnPartitionedTableByOneColumnsWithCompression";
  ResultSet res = executeString(
      "create table " + tableName + " (col2 int4, col3 float8) USING csv " +
          "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
          "PARTITION BY column(col1 int4)");
  res.close();
  assertTrue(catalog.existsTable(tableName));

  res = executeString(
      "insert overwrite into " + tableName + " select l_partkey, l_quantity, l_orderkey from lineitem");
  res.close();
  TableDesc desc = catalog.getTableDesc(tableName);
  assertEquals(5, desc.getStats().getNumRows().intValue());

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(desc.getPath()));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = desc.getPath();
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));

  for (FileStatus partition : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition.getPath()));
    for (FileStatus file : fs.listStatus(partition.getPath())) {
      CompressionCodec codec = factory.getCodec(file.getPath());
      assertTrue(codec instanceof DeflateCodec);
    }
  }
}
 
Example #7
Source File: TestInsertQuery.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
@Test
public final void testInsertOverwriteLocationWithCompression() throws Exception {
  ResultSet res = tpch.execute("insert overwrite into location '/tajo-data/testInsertOverwriteLocationWithCompression' USING csv WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') select * from lineitem where l_orderkey = 3");
  res.close();
  FileSystem fs = FileSystem.get(tpch.getTestingCluster().getConfiguration());
  Path path = new Path("/tajo-data/testInsertOverwriteLocationWithCompression");
  assertTrue(fs.exists(path));
  assertEquals(1, fs.listStatus(path).length);

  CompressionCodecFactory factory = new CompressionCodecFactory(tpch.getTestingCluster().getConfiguration());
  for (FileStatus file : fs.listStatus(path)){
    CompressionCodec codec = factory.getCodec(file.getPath());
    assertTrue(codec instanceof DeflateCodec);
  }
}
 
Example #8
Source File: TestTablePartitions.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableByOneColumnsWithCompression() throws Exception {
  ResultSet res = null;
  String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableByOneColumnsWithCompression");

  if (nodeType == NodeType.INSERT) {
    res = executeString(
      "create table " + tableName + " (col2 int4, col3 float8) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "PARTITION BY column(col1 int4)");
    res.close();
    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    res = executeString(
      "insert overwrite into " + tableName + " select l_partkey, l_quantity, l_orderkey from lineitem");
  } else {
    res = executeString(
      "create table " + tableName + " (col2 int4, col3 float8) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "PARTITION BY column(col1 int4) as select l_partkey, l_quantity, l_orderkey from lineitem");
  }
  res.close();

  TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(8, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = new Path(desc.getUri());
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));

  for (FileStatus partition : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition.getPath()));
    for (FileStatus file : fs.listStatus(partition.getPath())) {
      CompressionCodec codec = factory.getCodec(file.getPath());
      assertTrue(codec instanceof DeflateCodec);
    }
  }

  verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1"},
    desc.getStats().getNumRows());

  executeString("DROP TABLE " + tableName + " PURGE").close();
}
 
Example #9
Source File: TestTablePartitions.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableByTwoColumnsWithCompression() throws Exception {
  ResultSet res = null;
  String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableByTwoColumnsWithCompression");

  if (nodeType == NodeType.INSERT) {
    res = executeString("create table " + tableName + " (col3 float8, col4 text) USING text " +
      "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
      "PARTITION by column(col1 int4, col2 int4)");
    res.close();

    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    res = executeString(
      "insert overwrite into " + tableName +
        " select  l_quantity, l_returnflag, l_orderkey, l_partkey from lineitem");
  } else {
    res = executeString("create table " + tableName + " (col3 float8, col4 text) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "PARTITION by column(col1 int4, col2 int4) as select  l_quantity, l_returnflag, l_orderkey, " +
      "l_partkey from lineitem");
  }
  res.close();

  TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(8, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = new Path(desc.getUri());
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));

  for (FileStatus partition1 : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition1.getPath()));
    for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
      assertTrue(fs.isDirectory(partition2.getPath()));
      for (FileStatus file : fs.listStatus(partition2.getPath())) {
        CompressionCodec codec = factory.getCodec(file.getPath());
        assertTrue(codec instanceof DeflateCodec);
      }
    }
  }

  verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1", "col2"},
      desc.getStats().getNumRows());

  executeString("DROP TABLE " + tableName + " PURGE").close();
}
 
Example #10
Source File: TestTablePartitions.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableByThreeColumnsWithCompression() throws Exception {
  ResultSet res = null;
  String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableByThreeColumnsWithCompression");

  if (nodeType == NodeType.INSERT) {
    res = executeString(
      "create table " + tableName + " (col4 text) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "partition by column(col1 int4, col2 int4, col3 float8)");
    res.close();

    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    res = executeString(
      "insert overwrite into " + tableName +
        " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
  } else {
    res = executeString("create table " + tableName + " (col4 text) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "partition by column(col1 int4, col2 int4, col3 float8) as select l_returnflag, l_orderkey, l_partkey, " +
      "l_quantity from lineitem");
  }
  res.close();

  TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(8, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = new Path(desc.getUri());
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));

  for (FileStatus partition1 : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition1.getPath()));
    for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
      assertTrue(fs.isDirectory(partition2.getPath()));
      for (FileStatus partition3 : fs.listStatus(partition2.getPath())) {
        assertTrue(fs.isDirectory(partition3.getPath()));
        for (FileStatus file : fs.listStatus(partition3.getPath())) {
          CompressionCodec codec = factory.getCodec(file.getPath());
          assertTrue(codec instanceof DeflateCodec);
        }
      }
    }
  }

  res = executeString("select * from " + tableName + " where col2 = 2");

  Map<Double, int []> resultRows1 = Maps.newHashMap();
  resultRows1.put(45.0d, new int[]{3, 2});
  resultRows1.put(38.0d, new int[]{2, 2});

  int i = 0;
  while (res.next()) {
    assertEquals(resultRows1.get(res.getDouble(4))[0], res.getInt(2));
    assertEquals(resultRows1.get(res.getDouble(4))[1], res.getInt(3));
    i++;
  }
  res.close();
  assertEquals(2, i);

  Map<Double, int []> resultRows2 = Maps.newHashMap();
  resultRows2.put(49.0d, new int[]{3, 3});
  resultRows2.put(45.0d, new int[]{3, 2});
  resultRows2.put(38.0d, new int[]{2, 2});

  res = executeString("select * from " + tableName + " where (col1 = 2 or col1 = 3) and col2 >= 2");
  i = 0;
  while(res.next()) {
    assertEquals(resultRows2.get(res.getDouble(4))[0], res.getInt(2));
    assertEquals(resultRows2.get(res.getDouble(4))[1], res.getInt(3));
    i++;
  }

  res.close();
  assertEquals(3, i);

  verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1", "col2", "col3"},
    desc.getStats().getNumRows());

  executeString("DROP TABLE " + tableName + " PURGE").close();
}
 
Example #11
Source File: TestTablePartitions.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableNoMatchedPartition() throws Exception {
  ResultSet res = null;
  String tableName = IdentifierUtil.normalizeIdentifier("testColumnPartitionedTableNoMatchedPartition");

  if (nodeType == NodeType.INSERT) {
    res = executeString(
      "create table " + tableName + " (col4 text) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "partition by column(col1 int4, col2 int4, col3 float8)");
    res.close();

    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));

    res = executeString(
      "insert overwrite into " + tableName +
        " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem");
  } else {
    res = executeString("create table " + tableName + " (col4 text) USING text " +
        "WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
        "partition by column(col1 int4, col2 int4, col3 float8) as select l_returnflag , l_orderkey, l_partkey, " +
      "l_quantity from lineitem");
  }
  res.close();

  TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(8, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = new Path(desc.getUri());
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));

  for (FileStatus partition1 : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition1.getPath()));
    for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
      assertTrue(fs.isDirectory(partition2.getPath()));
      for (FileStatus partition3 : fs.listStatus(partition2.getPath())) {
        assertTrue(fs.isDirectory(partition3.getPath()));
        for (FileStatus file : fs.listStatus(partition3.getPath())) {
          CompressionCodec codec = factory.getCodec(file.getPath());
          assertTrue(codec instanceof DeflateCodec);
        }
      }
    }
  }

  res = executeString("select * from " + tableName + " where col2 = 9");
  assertFalse(res.next());
  res.close();

  verifyPartitionDirectoryFromCatalog(DEFAULT_DATABASE_NAME, tableName, new String[]{"col1", "col2", "col3"},
    desc.getStats().getNumRows());

  executeString("DROP TABLE " + tableName + " PURGE").close();
}
 
Example #12
Source File: TestTablePartitions.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableByTwoColumnsWithCompression() throws Exception {
  String tableName = "testColumnPartitionedTableByTwoColumnsWithCompression";
  ResultSet res = executeString("create table " + tableName + " (col3 float8, col4 text) USING csv " +
      "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
      "PARTITION by column(col1 int4, col2 int4)");
  res.close();

  assertTrue(catalog.existsTable(tableName));

  res = executeString(
      "insert overwrite into " + tableName +
          " select  l_quantity, l_returnflag, l_orderkey, l_partkey from lineitem");
  res.close();
  TableDesc desc = catalog.getTableDesc(tableName);
  assertEquals(5, desc.getStats().getNumRows().intValue());

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(desc.getPath()));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = desc.getPath();
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));

  for (FileStatus partition1 : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition1.getPath()));
    for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
      assertTrue(fs.isDirectory(partition2.getPath()));
      for (FileStatus file : fs.listStatus(partition2.getPath())) {
        CompressionCodec codec = factory.getCodec(file.getPath());
        assertTrue(codec instanceof DeflateCodec);
      }
    }
  }
}
 
Example #13
Source File: TestTablePartitions.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableByThreeColumnsWithCompression() throws Exception {
  String tableName = "testColumnPartitionedTableByThreeColumnsWithCompression";
  ResultSet res = executeString(
      "create table " + tableName + " (col4 text) USING csv " +
          "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
          "partition by column(col1 int4, col2 int4, col3 float8)");
  res.close();

  assertTrue(catalog.existsTable(tableName));

  res = executeString(
      "insert overwrite into " + tableName +
          " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
  res.close();
  TableDesc desc = catalog.getTableDesc(tableName);
  assertEquals(5, desc.getStats().getNumRows().intValue());

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(desc.getPath()));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = desc.getPath();
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));

  for (FileStatus partition1 : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition1.getPath()));
    for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
      assertTrue(fs.isDirectory(partition2.getPath()));
      for (FileStatus partition3 : fs.listStatus(partition2.getPath())) {
        assertTrue(fs.isDirectory(partition3.getPath()));
        for (FileStatus file : fs.listStatus(partition3.getPath())) {
          CompressionCodec codec = factory.getCodec(file.getPath());
          assertTrue(codec instanceof DeflateCodec);
        }
      }
    }
  }

  res = executeString("select * from " + tableName + " where col2 = 2");

  Map<Double, int []> resultRows1 = Maps.newHashMap();
  resultRows1.put(45.0d, new int[]{3, 2});
  resultRows1.put(38.0d, new int[]{2, 2});

  int i = 0;
  while (res.next()) {
    assertEquals(resultRows1.get(res.getDouble(4))[0], res.getInt(2));
    assertEquals(resultRows1.get(res.getDouble(4))[1], res.getInt(3));
    i++;
  }
  res.close();
  assertEquals(2, i);

  Map<Double, int []> resultRows2 = Maps.newHashMap();
  resultRows2.put(49.0d, new int[]{3, 3});
  resultRows2.put(45.0d, new int[]{3, 2});
  resultRows2.put(38.0d, new int[]{2, 2});

  res = executeString("select * from " + tableName + " where (col1 = 2 or col1 = 3) and col2 >= 2");
  i = 0;
  while(res.next()) {
    assertEquals(resultRows2.get(res.getDouble(4))[0], res.getInt(2));
    assertEquals(resultRows2.get(res.getDouble(4))[1], res.getInt(3));
    i++;
  }

  res.close();
  assertEquals(3, i);
}
 
Example #14
Source File: TestTablePartitions.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
@Test
public final void testColumnPartitionedTableNoMatchedPartition() throws Exception {
  String tableName = "testColumnPartitionedTableNoMatchedPartition";
  ResultSet res = executeString(
      "create table " + tableName + " (col4 text) USING csv " +
          "WITH ('csvfile.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
          "partition by column(col1 int4, col2 int4, col3 float8)");
  res.close();

  assertTrue(catalog.existsTable(tableName));

  res = executeString(
      "insert overwrite into " + tableName +
          " select l_returnflag , l_orderkey, l_partkey, l_quantity from lineitem");
  res.close();
  TableDesc desc = catalog.getTableDesc(tableName);
  assertEquals(5, desc.getStats().getNumRows().intValue());

  FileSystem fs = FileSystem.get(conf);
  assertTrue(fs.exists(desc.getPath()));
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  Path path = desc.getPath();
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
  assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));

  for (FileStatus partition1 : fs.listStatus(path)){
    assertTrue(fs.isDirectory(partition1.getPath()));
    for (FileStatus partition2 : fs.listStatus(partition1.getPath())) {
      assertTrue(fs.isDirectory(partition2.getPath()));
      for (FileStatus partition3 : fs.listStatus(partition2.getPath())) {
        assertTrue(fs.isDirectory(partition3.getPath()));
        for (FileStatus file : fs.listStatus(partition3.getPath())) {
          CompressionCodec codec = factory.getCodec(file.getPath());
          assertTrue(codec instanceof DeflateCodec);
        }
      }
    }
  }

  res = executeString("select * from " + tableName + " where col2 = 9");
  assertFalse(res.next());
  res.close();
}
 
Example #15
Source File: MergeSortRowIdMatcher.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
private static CompressionCodec getCodec(Configuration configuration) {
  if (ZlibFactory.isNativeZlibLoaded(configuration)) {
    return new GzipCodec();
  }
  return new DeflateCodec();
}