Java Code Examples for org.pentaho.di.core.RowMetaAndData#setData()

The following examples show how to use org.pentaho.di.core.RowMetaAndData#setData() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetaInjectTest.java    From pentaho-kettle with Apache License 2.0 5 votes vote down vote up
private static RowMetaAndData createRowMetaAndData( ValueMetaInterface valueMeta, Object data ) {
  RowMetaAndData row = new RowMetaAndData();
  RowMeta rowMeta = new RowMeta();
  rowMeta.addValueMeta( valueMeta );
  row.setRowMeta( rowMeta );
  row.setData( new Object[] { data } );
  return row;
}
 
Example 2
Source File: ParquetConverter.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
@Override
public void start() {
  current = new RowMetaAndData( fields );
  current.setData( new Object[ count ] );
}
 
Example 3
Source File: ParquetConverter.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
@Override
public void start() {
  current = new RowMetaAndData( fields );
  current.setData( new Object[ count ] );
}
 
Example 4
Source File: CommonFormatShimTest.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
@Test
public void testParquetWriteSuccessLocalFileSystem() throws Exception {
  final String PARQUET_FILE_NAME = "test.parquet";

  String tempFile = Files.createTempDirectory( "parquet" ).toUri().toString();

  ConfigurationProxy jobConfiguration = new ConfigurationProxy();
  jobConfiguration.set( FileOutputFormat.OUTDIR, tempFile );

  String parquetFilePath = jobConfiguration.get( FileOutputFormat.OUTDIR ) + PARQUET_FILE_NAME;

  IPentahoParquetOutputFormat pentahoParquetOutputFormat = null;
  switch ( provider ) {
    case "APACHE":
      pentahoParquetOutputFormat = new PentahoApacheOutputFormat();
      break;
    case "TWITTER":
      pentahoParquetOutputFormat = new PentahoTwitterOutputFormat();
      break;
    default:
      org.junit.Assert.fail( "Invalid provider name used." );
  }

  pentahoParquetOutputFormat.setOutputFile( parquetFilePath, true );

  pentahoParquetOutputFormat.setFields( ParquetUtils.createOutputFields( ParquetSpec.DataType.INT_64 ) );

  IPentahoRecordWriter recordWriter = pentahoParquetOutputFormat.createRecordWriter();
  RowMetaAndData rowInput = new RowMetaAndData();
  RowMeta rowMeta = new RowMeta();
  rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
  rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
  rowInput.setRowMeta( rowMeta );

  rowInput.setData( new Object[] { "Andrey", "11" } );

  recordWriter.write( rowInput );
  recordWriter.close();

  IPentahoRecordReader recordReader = readCreatedParquetFile( parquetFilePath );

  Object[] rowInputArr =
    new Object[] { rowInput.getData()[ 0 ].toString(), Long.parseLong( rowInput.getData()[ 1 ].toString() ) };

  recordReader.forEach(
    rowMetaAndData -> org.junit.Assert.assertArrayEquals( rowMetaAndData.getData(), rowInputArr ) );
}
 
Example 5
Source File: PentahoParquetRecordWriterTest.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
@Test
public void recordWriterCreateFile() throws Exception {

  IPentahoOutputFormat.IPentahoRecordWriter writer = null;
  Object recordWriterObject = null;

  switch ( provider ) {
    case "APACHE":
      org.apache.parquet.hadoop.api.WriteSupport apacheSupport =
        new org.pentaho.hadoop.shim.common.format.parquet.delegate.apache.PentahoParquetWriteSupport(
          ParquetUtils.createOutputFields( ParquetSpec.DataType.INT_64 ) );
      org.apache.parquet.hadoop.ParquetOutputFormat apacheNativeParquetOutputFormat =
        new org.apache.parquet.hadoop.ParquetOutputFormat<>( apacheSupport );
      org.apache.parquet.hadoop.ParquetRecordWriter<RowMetaAndData> apacheRecordWriter =
        (org.apache.parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) apacheNativeParquetOutputFormat
          .getRecordWriter( task );
      recordWriterObject = apacheRecordWriter;
      writer = new org.pentaho.hadoop.shim.common.format.parquet.delegate.apache.PentahoParquetRecordWriter(
        apacheRecordWriter, task );
      break;
    case "TWITTER":
      parquet.hadoop.api.WriteSupport twitterSupport =
        new org.pentaho.hadoop.shim.common.format.parquet.delegate.twitter.PentahoParquetWriteSupport(
          ParquetUtils.createOutputFields( ParquetSpec.DataType.INT_64 ) );
      parquet.hadoop.ParquetOutputFormat twitterNativeParquetOutputFormat =
        new parquet.hadoop.ParquetOutputFormat<>( twitterSupport );
      parquet.hadoop.ParquetRecordWriter<RowMetaAndData> twitterRecordWriter =
        (parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) twitterNativeParquetOutputFormat.getRecordWriter( task );
      recordWriterObject = twitterRecordWriter;
      writer = new org.pentaho.hadoop.shim.common.format.parquet.delegate.twitter.PentahoParquetRecordWriter(
        twitterRecordWriter, task );
      break;
    default:
      org.junit.Assert.fail( "Invalid provider name used." );
  }

  RowMetaAndData row = new RowMetaAndData();
  RowMeta rowMeta = new RowMeta();
  rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
  rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
  row.setRowMeta( rowMeta );

  switch ( testType ) {
    case "DATA":
      row.setData( new Object[] { "Alex", "87" } );
      break;
    case "NULL":
      row.setData( new Object[] { null, null } );
      break;
    default:
      org.junit.Assert.fail( "Invalid test type used." );
  }

  writer.write( row );

  switch ( provider ) {
    case "APACHE":
      ( (org.apache.parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) recordWriterObject ).close( task );
      break;
    case "TWITTER":
      ( (parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) recordWriterObject ).close( task );
      break;
    default:
      org.junit.Assert.fail( "Invalid provider name used." );
  }

  Files.walk( Paths.get( tempFile.toString() ) )
    .filter( Files::isRegularFile )
    .forEach( ( f ) -> {
      String file = f.toString();
      if ( file.endsWith( "parquet" ) ) {
        try {
          switch ( testType ) {
            case "DATA":
              IPentahoInputFormat.IPentahoRecordReader recordReader =
                readCreatedParquetFile( Paths.get( file ).toUri().toString() );
              recordReader.forEach(
                rowMetaAndData -> Assert.assertTrue( rowMetaAndData.size() == 2 ) );
              break;
            case "NULL":
              Assert.assertTrue( Files.size( Paths.get( file ) ) > 0 );
              break;
            default:
              org.junit.Assert.fail( "Invalid test type used." );
          }
        } catch ( Exception e ) {
          e.printStackTrace();
        }
      }
    } );
}