org.apache.hadoop.io.ArrayWritable Java Examples
The following examples show how to use
org.apache.hadoop.io.ArrayWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractParquetMapInspector.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public int getMapSize(final Object data) { if (data == null) { return -1; } if (data instanceof ArrayWritable) { final Writable[] mapContainer = ((ArrayWritable) data).get(); if (mapContainer == null || mapContainer.length == 0) { return -1; } else { return ((ArrayWritable) mapContainer[0]).get().length; } } if (data instanceof Map) { return ((Map) data).size(); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #2
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2003encrypt.xls"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password", "test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, () -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password"); }
Example #3
Source File: FSEditLogOp.java From hadoop with Apache License 2.0 | 6 votes |
@Override public void writeFields(DataOutputStream out) throws IOException { FSImageSerialization.writeLong(inodeId, out); FSImageSerialization.writeString(path, out); FSImageSerialization.writeShort(replication, out); FSImageSerialization.writeLong(mtime, out); FSImageSerialization.writeLong(atime, out); FSImageSerialization.writeLong(blockSize, out); new ArrayWritable(Block.class, blocks).write(out); permissions.write(out); if (this.opCode == OP_ADD) { AclEditLogUtil.write(aclEntries, out); XAttrEditLogProto.Builder b = XAttrEditLogProto.newBuilder(); b.addAllXAttrs(PBHelper.convertXAttrProto(xAttrs)); b.build().writeDelimitedTo(out); FSImageSerialization.writeString(clientName,out); FSImageSerialization.writeString(clientMachine,out); FSImageSerialization.writeBoolean(overwrite, out); FSImageSerialization.writeByte(storagePolicyId, out); // write clientId and callId writeRpcIds(rpcClientId, rpcCallId, out); } }
Example #4
Source File: RealtimeUnmergedRecordReader.java From hudi with Apache License 2.0 | 6 votes |
/** * Construct a Unmerged record reader that parallely consumes both parquet and log records and buffers for upstream * clients to consume. * * @param split File split * @param job Job Configuration * @param realReader Parquet Reader */ public RealtimeUnmergedRecordReader(HoodieRealtimeFileSplit split, JobConf job, RecordReader<NullWritable, ArrayWritable> realReader) { super(split, job); this.parquetReader = new SafeParquetRecordReaderWrapper(realReader); // Iterator for consuming records from parquet file this.parquetRecordsIterator = new RecordReaderValueIterator<>(this.parquetReader); this.executor = new BoundedInMemoryExecutor<>(getMaxCompactionMemoryInBytes(), getParallelProducers(), Option.empty(), x -> x, new DefaultSizeEstimator<>()); // Consumer of this record reader this.iterator = this.executor.getQueue().iterator(); this.logRecordScanner = new HoodieUnMergedLogRecordScanner(FSUtils.getFs(split.getPath().toString(), jobConf), split.getBasePath(), split.getDeltaLogPaths(), getReaderSchema(), split.getMaxCommitTime(), Boolean.parseBoolean(jobConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)), false, jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE), record -> { // convert Hoodie log record to Hadoop AvroWritable and buffer GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema()).get(); ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(rec, getHiveSchema()); this.executor.getQueue().insertRecord(aWritable); }); // Start reading and buffering this.executor.startProducers(); }
Example #5
Source File: DiscoveryLogic.java From datawave with Apache License 2.0 | 6 votes |
/** * Takes in a batch scanner and returns an iterator over the DiscoveredThing objects contained in the value. * * @param scanner * @return */ public static Iterator<DiscoveredThing> transformScanner(final BatchScanner scanner) { return concat(transform(scanner.iterator(), new Function<Entry<Key,Value>,Iterator<DiscoveredThing>>() { DataInputBuffer in = new DataInputBuffer(); @Override public Iterator<DiscoveredThing> apply(Entry<Key,Value> from) { Value value = from.getValue(); in.reset(value.get(), value.getSize()); ArrayWritable aw = new ArrayWritable(DiscoveredThing.class); try { aw.readFields(in); } catch (IOException e) { log.error(e); return null; } ArrayList<DiscoveredThing> thangs = Lists.newArrayListWithCapacity(aw.get().length); for (Writable w : aw.get()) { thangs.add((DiscoveredThing) w); } return thangs.iterator(); } })); }
Example #6
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "stax"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password", "test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, () -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password"); }
Example #7
Source File: StandardParquetHiveMapInspector.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public Object getMapValueElement(final Object data, final Object key) { if (data == null || key == null) { return null; } if (data instanceof ArrayWritable) { final Writable[] mapContainer = ((ArrayWritable) data).get(); if (mapContainer == null || mapContainer.length == 0) { return null; } final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); if (key.equals(arr[0])) { return arr[1]; } } return null; } if (data instanceof Map) { return ((Map) data).get(key); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #8
Source File: ArrayWritableObjectInspector.java From indexr with Apache License 2.0 | 6 votes |
@Override public Object getStructFieldData(final Object data, final StructField fieldRef) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; return arr.get()[((StructFieldImpl) fieldRef).getIndex()]; } //since setStructFieldData and create return a list, getStructFieldData should be able to //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde //is something else. if (data instanceof List) { return ((List) data).get(((StructFieldImpl) fieldRef).getIndex()); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #9
Source File: QueryUtils.java From incubator-retired-pirk with Apache License 2.0 | 6 votes |
/** * Pulls the correct selector from the MapWritable data element given the queryType * <p> * Pulls first element of array if element is an array type */ public static String getSelectorByQueryType(MapWritable dataMap, QuerySchema qSchema, DataSchema dSchema) { String selector; String fieldName = qSchema.getSelectorName(); if (dSchema.isArrayElement(fieldName)) { if (dataMap.get(dSchema.getTextName(fieldName)) instanceof WritableArrayWritable) { String[] selectorArray = ((WritableArrayWritable) dataMap.get(dSchema.getTextName(fieldName))).toStrings(); selector = selectorArray[0]; } else { String[] elementArray = ((ArrayWritable) dataMap.get(dSchema.getTextName(fieldName))).toStrings(); selector = elementArray[0]; } } else { selector = dataMap.get(dSchema.getTextName(fieldName)).toString(); } return selector; }
Example #10
Source File: FSEditLogOp.java From big-c with Apache License 2.0 | 6 votes |
@Override public void writeFields(DataOutputStream out) throws IOException { FSImageSerialization.writeLong(inodeId, out); FSImageSerialization.writeString(path, out); FSImageSerialization.writeShort(replication, out); FSImageSerialization.writeLong(mtime, out); FSImageSerialization.writeLong(atime, out); FSImageSerialization.writeLong(blockSize, out); new ArrayWritable(Block.class, blocks).write(out); permissions.write(out); if (this.opCode == OP_ADD) { AclEditLogUtil.write(aclEntries, out); XAttrEditLogProto.Builder b = XAttrEditLogProto.newBuilder(); b.addAllXAttrs(PBHelper.convertXAttrProto(xAttrs)); b.build().writeDelimitedTo(out); FSImageSerialization.writeString(clientName,out); FSImageSerialization.writeString(clientMachine,out); FSImageSerialization.writeBoolean(overwrite, out); FSImageSerialization.writeByte(storagePolicyId, out); // write clientId and callId writeRpcIds(rpcClientId, rpcCallId, out); } }
Example #11
Source File: HoodieParquetInputFormat.java From hudi with Apache License 2.0 | 6 votes |
@Override public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf job, final Reporter reporter) throws IOException { // TODO enable automatic predicate pushdown after fixing issues // FileSplit fileSplit = (FileSplit) split; // HoodieTableMetadata metadata = getTableMetadata(fileSplit.getPath().getParent()); // String tableName = metadata.getTableName(); // String mode = HoodieHiveUtil.readMode(job, tableName); // if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) { // FilterPredicate predicate = constructHoodiePredicate(job, tableName, split); // LOG.info("Setting parquet predicate push down as " + predicate); // ParquetInputFormat.setFilterPredicate(job, predicate); // clearOutExistingPredicate(job); // } return super.getRecordReader(split, job, reporter); }
Example #12
Source File: FSEditLogOp.java From RDFS with Apache License 2.0 | 6 votes |
@Override void writeFields(DataOutputStream out) throws IOException { out.writeInt(5); FSImageSerialization.writeString(path, out); FSImageSerialization.writeShortAsString(replication, out); FSImageSerialization.writeLongAsString(mtime, out); FSImageSerialization.writeLongAsString(atime, out); FSImageSerialization.writeLongAsString(blockSize, out); new ArrayWritable(Block.class, blocks).write(out); permissions.write(out); if (this.opCode == OP_ADD) { FSImageSerialization.writeString(clientName,out); FSImageSerialization.writeString(clientMachine,out); } }
Example #13
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "stax"); // for decryption simply set the password job.set("hadoopoffice.read.security.crypt.password", "test2"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNull(reader, "Null record reader implies invalid password"); }
Example #14
Source File: AbstractSpreadSheetDocumentRecordWriter.java From hadoopoffice with Apache License 2.0 | 6 votes |
/** * * Write SpreadSheetDAO into a table document. Note this does not necessarily mean it is already written in the OutputStream, but usually the in-memory representation. * @param key is ignored * @param value is a SpreadSheet Cell to be inserted into the table document * */ @Override public synchronized void write(NullWritable key, K value) throws IOException { try { if (value==null) { return; } if (value instanceof ArrayWritable) { ArrayWritable row = (ArrayWritable)value; Writable[] rowCellDAO = row.get(); for (int i=0;i<rowCellDAO.length;i++) { this.officeWriter.write(rowCellDAO[i]); } } else { this.officeWriter.write(value); } } catch (OfficeWriterException e) { LOG.error(e); } }
Example #15
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2003encrypt.xls"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint job.set("hadoopoffice.read.lowFootprint", "true"); // for decryption simply set the password job.set("hadoopoffice.read.security.crypt.password", "test2"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNull(reader, "Null record reader implies invalid password"); }
Example #16
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "sax"); // for decryption simply set the password job.set("hadoopoffice.read.security.crypt.password", "test2"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNull(reader, "Null record reader implies invalid password"); }
Example #17
Source File: TestHoodieParquetInputFormat.java From hudi with Apache License 2.0 | 6 votes |
private void ensureRecordsInCommit(String msg, String commit, int expectedNumberOfRecordsInCommit, int totalExpected) throws IOException { int actualCount = 0; int totalCount = 0; InputSplit[] splits = inputFormat.getSplits(jobConf, 1); for (InputSplit split : splits) { RecordReader<NullWritable, ArrayWritable> recordReader = inputFormat.getRecordReader(split, jobConf, null); NullWritable key = recordReader.createKey(); ArrayWritable writable = recordReader.createValue(); while (recordReader.next(key, writable)) { // writable returns an array with [field1, field2, _hoodie_commit_time, // _hoodie_commit_seqno] // Take the commit time and compare with the one we are interested in if (commit.equals((writable.get()[2]).toString())) { actualCount++; } totalCount++; } } assertEquals(expectedNumberOfRecordsInCommit, actualCount, msg); assertEquals(totalExpected, totalCount, msg); }
Example #18
Source File: LinkedMapWritable.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
@Override public String toString() { Iterator<Entry<Writable, Writable>> i = entrySet().iterator(); if (!i.hasNext()) return "{}"; StringBuilder sb = new StringBuilder(); sb.append('{'); for (;;) { Entry<Writable, Writable> e = i.next(); Writable key = e.getKey(); Writable value = e.getValue(); sb.append(key == this ? "(this Map)" : key); sb.append('='); if (value instanceof ArrayWritable) { sb.append(Arrays.toString(((ArrayWritable) value).get())); } else { sb.append(value == this ? "(this Map)" : value); } if (!i.hasNext()) return sb.append('}').toString(); sb.append(", "); } }
Example #19
Source File: ArrayWritableObjectInspector.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public Object getStructFieldData(final Object data, final StructField fieldRef) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; return arr.get()[((StructFieldImpl) fieldRef).getIndex()]; } //since setStructFieldData and create return a list, getStructFieldData should be able to //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde //is something else. if (data instanceof List) { return ((List) data).get(((StructFieldImpl) fieldRef).getIndex()); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #20
Source File: DataWritableGroupConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
public final ArrayWritable getCurrentArray() { final Writable[] writableArr; if (this.rootMap != null) { // We're at the root : we can safely re-use the same map to save perf writableArr = this.rootMap; } else { writableArr = new Writable[currentArr.length]; } for (int i = 0; i < currentArr.length; i++) { final Object obj = currentArr[i]; if (obj instanceof List) { final List<?> objList = (List<?>)obj; final ArrayWritable arr = new ArrayWritable(Writable.class, objList.toArray(new Writable[objList.size()])); writableArr[i] = arr; } else { writableArr[i] = (Writable) obj; } } return new ArrayWritable(Writable.class, writableArr); }
Example #21
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 5 votes |
@Test public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "multisheetheader.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "us"); conf.set("hadoopoffice.read.header.read", "true"); conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true"); conf.set("hadoopoffice.read.header.column.names.regex","column"); conf.set("hadoopoffice.read.header.column.names.replace", "spalte"); conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "sax"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0], " header column 1 correctly read"); assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1], " header column 2 correctly read"); assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2], " header column 3 correctly read"); }
Example #22
Source File: ParquetHiveArrayInspector.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public List<?> getList(final Object data) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final Writable[] listContainer = ((ArrayWritable) data).get(); if (listContainer == null || listContainer.length == 0) { return null; } final Writable subObj = listContainer[0]; if (subObj == null) { return null; } final Writable[] array = ((ArrayWritable) subObj).get(); final List<Writable> list = new ArrayList<Writable>(); for (final Writable obj : array) { list.add(obj); } return list; } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #23
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 5 votes |
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedPositiveLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "sax"); // for decryption simply set the password job.set("hadoopoffice.read.security.crypt.password", "test"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 1"); assertEquals("[excel2013encrypt.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013encrypt.xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); }
Example #24
Source File: TestStandardParquetHiveMapInspector.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testRegularMap() { final Writable[] entry1 = new Writable[]{new IntWritable(0), new IntWritable(1)}; final Writable[] entry2 = new Writable[]{new IntWritable(2), new IntWritable(3)}; final ArrayWritable internalMap = new ArrayWritable(ArrayWritable.class, new Writable[]{ new ArrayWritable(Writable.class, entry1), new ArrayWritable(Writable.class, entry2)}); final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new Writable[]{internalMap}); assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0))); assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2))); assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 0))); assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 2))); }
Example #25
Source File: ArrayWritableObjectInspector.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public List<Object> getStructFieldsDataAsList(final Object data) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; final Object[] arrWritable = arr.get(); return new ArrayList<Object>(Arrays.asList(arrWritable)); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #26
Source File: ParquetRecordReaderWrapper.java From parquet-mr with Apache License 2.0 | 5 votes |
public ParquetRecordReaderWrapper( final ParquetInputFormat<ArrayWritable> newInputFormat, final InputSplit oldSplit, final JobConf oldJobConf, final Reporter reporter) throws IOException, InterruptedException { this(newInputFormat, oldSplit, oldJobConf, reporter, (new HiveBindingFactory()).create()); }
Example #27
Source File: TestParquetHiveArrayInspector.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testNullContainer() { final ArrayWritable list = new ArrayWritable(ArrayWritable.class, null); assertEquals("Wrong size", -1, inspector.getListLength(list)); assertNull("Should be null", inspector.getList(list)); assertNull("Should be null", inspector.getListElement(list, 0)); }
Example #28
Source File: AbstractParquetMapInspector.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public Map<?, ?> getMap(final Object data) { if (data == null) { return null; } if (data instanceof ArrayWritable) { final Writable[] mapContainer = ((ArrayWritable) data).get(); if (mapContainer == null || mapContainer.length == 0) { return null; } final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); final Map<Writable, Writable> map = new HashMap<Writable, Writable>(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); map.put(arr[0], arr[1]); } return map; } if (data instanceof Map) { return (Map) data; } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
Example #29
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java From hadoopoffice with Apache License 2.0 | 5 votes |
@Test public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "multisheetheader.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "us"); conf.set("hadoopoffice.read.header.read", "true"); conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true"); conf.set("hadoopoffice.read.header.column.names.regex","column"); conf.set("hadoopoffice.read.header.column.names.replace", "spalte"); conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "stax"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0], " header column 1 correctly read"); assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1], " header column 2 correctly read"); assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2], " header column 3 correctly read"); }
Example #30
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java From hadoopoffice with Apache License 2.0 | 5 votes |
@Test public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "multisheetheader.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "us"); job.set("hadoopoffice.read.header.read", "true"); job.set("hadoopoffice.read.header.skipheaderinallsheets", "true"); job.set("hadoopoffice.read.header.column.names.regex", "column"); job.set("hadoopoffice.read.header.column.names.replace", "spalte"); job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "stax"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0], " header column 1 correctly read"); assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1], " header column 2 correctly read"); assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2], " header column 3 correctly read"); }