org.apache.hadoop.io.Text Java Examples
The following examples show how to use
org.apache.hadoop.io.Text.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestDFSIO.java From big-c with Apache License 2.0 | 6 votes |
@Override // IOMapperBase void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize) throws IOException { long totalSize = objSize.longValue(); float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA); LOG.info("Number of bytes processed = " + totalSize); LOG.info("Exec time = " + execTime); LOG.info("IO rate = " + ioRateMbSec); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"), new Text(String.valueOf(totalSize))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"), new Text(String.valueOf(execTime))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"), new Text(String.valueOf(ioRateMbSec*1000))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"), new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000))); }
Example #2
Source File: TestIndexedSort.java From big-c with Apache License 2.0 | 6 votes |
public WritableSortable(int j) throws IOException { seed = r.nextLong(); r.setSeed(seed); Text t = new Text(); StringBuilder sb = new StringBuilder(); indices = new int[j]; offsets = new int[j]; check = new String[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.getLength(); genRandom(t, r.nextInt(15) + 1, sb); t.write(dob); check[i] = t.toString(); } eob = dob.getLength(); bytes = dob.getData(); comparator = WritableComparator.get(Text.class); }
Example #3
Source File: YARNRunner.java From hadoop with Apache License 2.0 | 6 votes |
@VisibleForTesting void addHistoryToken(Credentials ts) throws IOException, InterruptedException { /* check if we have a hsproxy, if not, no need */ MRClientProtocol hsProxy = clientCache.getInitializedHSProxy(); if (UserGroupInformation.isSecurityEnabled() && (hsProxy != null)) { /* * note that get delegation token was called. Again this is hack for oozie * to make sure we add history server delegation tokens to the credentials */ RMDelegationTokenSelector tokenSelector = new RMDelegationTokenSelector(); Text service = resMgrDelegate.getRMDelegationTokenService(); if (tokenSelector.selectToken(service, ts.getAllTokens()) != null) { Text hsService = SecurityUtil.buildTokenService(hsProxy .getConnectAddress()); if (ts.getToken(hsService) == null) { ts.addToken(hsService, getDelegationTokenFromHS(hsProxy)); } } } }
Example #4
Source File: PipeMapRed.java From RDFS with Apache License 2.0 | 6 votes |
/** * Split a line into key and value. * @param line: a byte array of line containing UTF-8 bytes * @param key: key of a record * @param val: value of a record * @throws IOException */ void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException { int numKeyFields = getNumOfKeyFields(); byte[] separator = getFieldSeparator(); // Need to find numKeyFields separators int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator); for(int k=1; k<numKeyFields && pos!=-1; k++) { pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator); } try { if (pos == -1) { key.set(line, 0, length); val.set(""); } else { StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length); } } catch (CharacterCodingException e) { LOG.warn(StringUtils.stringifyException(e)); } }
Example #5
Source File: QueueInfo.java From hadoop with Apache License 2.0 | 6 votes |
@Override public void write(DataOutput out) throws IOException { Text.writeString(out, queueName); WritableUtils.writeEnum(out, queueState); if(schedulingInfo!= null) { Text.writeString(out, schedulingInfo); }else { Text.writeString(out, "N/A"); } out.writeInt(stats.length); for (JobStatus stat : stats) { stat.write(out); } out.writeInt(children.size()); for(QueueInfo childQueueInfo : children) { childQueueInfo.write(out); } }
Example #6
Source File: BuildGlobalHiveDictPartPartitioner.java From kylin with Apache License 2.0 | 6 votes |
@Override public int getPartition(Text key, NullWritable value, int numReduceTasks) { // get first byte, the first byte value is the dic col index ,start from 0 int colIndex = key.getBytes()[0]; int colReduceNum = reduceNumArr[colIndex]; int colReduceNumOffset = 0; for (int i = 0; i < colIndex; i++) { colReduceNumOffset += reduceNumArr[i]; } // Calculate reduce number , reduce num = (value.hash % colReduceNum) + colReduceNumOffset byte[] keyBytes = Bytes.copy(key.getBytes(), 1, key.getLength() - 1); int hashCode = new Text(keyBytes).hashCode() & 0x7FFFFFFF; return hashCode % colReduceNum + colReduceNumOffset; }
Example #7
Source File: ZipUnpackerSequenceFileWriter.java From localization_nifi with Apache License 2.0 | 6 votes |
@Override protected void processInputStream(InputStream stream, final FlowFile flowFile, final Writer writer) throws IOException { try (final ZipInputStream zipIn = new ZipInputStream(new BufferedInputStream(stream))) { ZipEntry zipEntry; while ((zipEntry = zipIn.getNextEntry()) != null) { if (zipEntry.isDirectory()) { continue; } final File file = new File(zipEntry.getName()); final String key = file.getName(); long fileSize = zipEntry.getSize(); final InputStreamWritable inStreamWritable = new InputStreamWritable(zipIn, (int) fileSize); writer.append(new Text(key), inStreamWritable); logger.debug("Appending FlowFile {} to Sequence File", new Object[]{key}); } } }
Example #8
Source File: StreamingRepartitionJoin.java From hiped2 with Apache License 2.0 | 6 votes |
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { UserLog userLog = UserLog.fromText(value); Tuple outputKey = new Tuple(); outputKey.setString(KeyFields.USER, userLog.getName()); outputKey.setInt(KeyFields.DATASET, USER_LOGS); Tuple outputValue = new Tuple(); outputValue.setInt(ValueFields.DATASET, USER_LOGS); outputValue.setString(ValueFields.DATA, value.toString()); context.write(outputKey, outputValue); }
Example #9
Source File: SequenceFileLoader.java From spork with Apache License 2.0 | 6 votes |
protected Object translateWritableToPigDataType(Writable w, byte dataType) { switch(dataType) { case DataType.CHARARRAY: return ((Text) w).toString(); case DataType.BYTEARRAY: BytesWritable bw = (BytesWritable) w; // Make a copy return new DataByteArray(bw.getBytes(), 0, bw.getLength()); case DataType.BOOLEAN: return ((BooleanWritable) w).get(); case DataType.INTEGER: return ((IntWritable) w).get(); case DataType.LONG: return ((LongWritable) w).get(); case DataType.FLOAT: return ((FloatWritable) w).get(); case DataType.DOUBLE: return ((DoubleWritable) w).get(); case DataType.BYTE: return ((ByteWritable) w).get(); case DataType.DATETIME: return ((DateTimeWritable) w).get(); } return null; }
Example #10
Source File: Mapper2HbaseDemo.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String arr[] = value.toString().split(","); if (arr.length == 2) { put = new Put(Bytes.toBytes(arr[0])); put.add(Bytes.toBytes("blog"), Bytes.toBytes("url"), Bytes.toBytes(arr[1])); htable.put(put); if ((++count % 100) == 0) { context.setStatus("Mapper has insert records=" + count); context.progress(); LOG.info("Mapper has insert records=" + count); } } if (!wal) { put.setWriteToWAL(false); } }
Example #11
Source File: TestTokenCache.java From hadoop with Apache License 2.0 | 6 votes |
private MockFileSystem createFileSystemForServiceName(final String service) throws IOException { MockFileSystem mockFs = new MockFileSystem(); when(mockFs.getCanonicalServiceName()).thenReturn(service); when(mockFs.getDelegationToken(any(String.class))).thenAnswer( new Answer<Token<?>>() { int unique = 0; @Override public Token<?> answer(InvocationOnMock invocation) throws Throwable { Token<?> token = new Token<TokenIdentifier>(); token.setService(new Text(service)); // use unique value so when we restore from token storage, we can // tell if it's really the same token token.setKind(new Text("token" + unique++)); return token; } }); return mockFs; }
Example #12
Source File: TestChainMapReduce.java From RDFS with Apache License 2.0 | 6 votes |
public void reduce(LongWritable key, Iterator<Text> values, OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException { while (values.hasNext()) { Text value = values.next(); writeFlag(conf, "reduce." + name + ".value." + value); key.set(10); output.collect(key, value); if (byValue) { assertEquals(10, key.get()); } else { assertNotSame(10, key.get()); } key.set(11); } }
Example #13
Source File: DataToDoubleSummarySketchUDAFTest.java From incubator-datasketches-hive with Apache License 2.0 | 6 votes |
@Test public void partial1ModeStringKeysExplicitParams() throws Exception { ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, doubleInspector, intInspector, floatInspector }; GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false); try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) { ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors); checkIntermediateResultInspector(resultInspector); @SuppressWarnings("unchecked") State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer(); eval.iterate(state, new Object[] {new Text("a"), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f)}); eval.iterate(state, new Object[] {new Text("b"), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f)}); Object result = eval.terminatePartial(state); Assert.assertNotNull(result); Assert.assertTrue(result instanceof List); List<?> r = (List<?>) result; Assert.assertEquals(r.size(), 2); Assert.assertEquals(((IntWritable) r.get(0)).get(), 32); Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch( BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(1)), new DoubleSummaryDeserializer()); // because of sampling probability < 1 Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05); } }
Example #14
Source File: TestExtParser.java From anthelion with Apache License 2.0 | 6 votes |
protected void setUp() throws ProtocolException, IOException { // prepare a temp file with expectedText as its content // This system property is defined in ./src/plugin/build-plugin.xml String path = System.getProperty("test.data"); if (path != null) { File tempDir = new File(path); if (!tempDir.exists()) tempDir.mkdir(); tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt",tempDir); } else { // otherwise in java.io.tmpdir tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt"); } urlString = tempFile.toURL().toString(); FileOutputStream fos = new FileOutputStream(tempFile); fos.write(expectedText.getBytes()); fos.close(); // get nutch content Protocol protocol = new ProtocolFactory(NutchConfiguration.create()).getProtocol(urlString); content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent(); protocol = null; }
Example #15
Source File: ReduceSideJoin.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { // Clear our lists listA.clear(); listB.clear(); // iterate through all our values, binning each record based on what // it was tagged with. Make sure to remove the tag! while (values.iterator().hasNext()) { tmp = values.iterator().next(); if (tmp.charAt(0) == 'A') { listA.add(new Text(tmp.toString().substring(1))); } else if (tmp.charAt('0') == 'B') { listB.add(new Text(tmp.toString().substring(1))); } } // Execute our join logic now that the lists are filled executeJoinLogic(context); }
Example #16
Source File: DataJoinMapperBase.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public void map(Object key, Object value, OutputCollector output, Reporter reporter) throws IOException { if (this.reporter == null) { this.reporter = reporter; } addLongValue("totalCount", 1); TaggedMapOutput aRecord = generateTaggedMapOutput(value); if (aRecord == null) { addLongValue("discardedCount", 1); return; } Text groupKey = generateGroupKey(aRecord); if (groupKey == null) { addLongValue("nullGroupKeyCount", 1); return; } output.collect(groupKey, aRecord); addLongValue("collectedCount", 1); }
Example #17
Source File: HDFSTransactionLogTest.java From phoenix-tephra with Apache License 2.0 | 6 votes |
private SequenceFile.Writer getSequenceFileWriter(Configuration configuration, FileSystem fs, long timeInMillis, byte versionNumber) throws IOException { String snapshotDir = configuration.get(TxConstants.Manager.CFG_TX_SNAPSHOT_DIR); Path newLog = new Path(snapshotDir, LOG_FILE_PREFIX + timeInMillis); SequenceFile.Metadata metadata = new SequenceFile.Metadata(); if (versionNumber > 1) { metadata.set(new Text(TxConstants.TransactionLog.VERSION_KEY), new Text(Byte.toString(versionNumber))); } switch (versionNumber) { case 1: case 2: return SequenceFile.createWriter(fs, configuration, newLog, LongWritable.class, co.cask.tephra.persist.TransactionEdit.class, SequenceFile.CompressionType.NONE, null, null, metadata); default: return SequenceFile.createWriter(fs, configuration, newLog, LongWritable.class, TransactionEdit.class, SequenceFile.CompressionType.NONE, null, null, metadata); } }
Example #18
Source File: RDDConverterUtils.java From systemds with Apache License 2.0 | 6 votes |
@Override public String call(Text v1) throws Exception { //parse input line String line = v1.toString(); String[] cols = IOUtilFunctions.split(line, _delim); //determine number of non-zeros of row (w/o string parsing) int lnnz = IOUtilFunctions.countNnz(cols); //update counters _aNnz.add( lnnz ); return line; }
Example #19
Source File: HistoryClientService.java From hadoop with Apache License 2.0 | 6 votes |
@Override public CancelDelegationTokenResponse cancelDelegationToken( CancelDelegationTokenRequest request) throws IOException { if (!isAllowedDelegationTokenOp()) { throw new IOException( "Delegation Token can be cancelled only with kerberos authentication"); } org.apache.hadoop.yarn.api.records.Token protoToken = request.getDelegationToken(); Token<MRDelegationTokenIdentifier> token = new Token<MRDelegationTokenIdentifier>( protoToken.getIdentifier().array(), protoToken.getPassword() .array(), new Text(protoToken.getKind()), new Text( protoToken.getService())); String user = UserGroupInformation.getCurrentUser().getUserName(); jhsDTSecretManager.cancelToken(token, user); return Records.newRecord(CancelDelegationTokenResponse.class); }
Example #20
Source File: TestDFSIO.java From hadoop with Apache License 2.0 | 6 votes |
@Override // IOMapperBase void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize) throws IOException { long totalSize = objSize.longValue(); float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA); LOG.info("Number of bytes processed = " + totalSize); LOG.info("Exec time = " + execTime); LOG.info("IO rate = " + ioRateMbSec); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"), new Text(String.valueOf(totalSize))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"), new Text(String.valueOf(execTime))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"), new Text(String.valueOf(ioRateMbSec*1000))); output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"), new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000))); }
Example #21
Source File: ShardedTableMapFileTest.java From datawave with Apache License 2.0 | 5 votes |
@Test public void testSingleDaySplitsCreated_AndValid() throws Exception { String tableName = "validSplits"; SortedMap<Text,String> splits = createDistributedLocations(tableName); createSplitsFile(splits, conf, splits.size(), tableName); Map<Text,String> locations = ShardedTableMapFile.getShardIdToLocations(conf, tableName); // three days of splits, all should be good, none of these should error ShardedTableMapFile.validateShardIdLocations(conf, tableName, 0, locations); ShardedTableMapFile.validateShardIdLocations(conf, tableName, 1, locations); ShardedTableMapFile.validateShardIdLocations(conf, tableName, 2, locations); }
Example #22
Source File: FlowStatistics.java From MapReduce-Demo with MIT License | 5 votes |
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException { int upFlow = 0; int downFlow = 0; for (Text value : values) { String[] strs = value.toString().split("\t"); upFlow += Integer.parseInt(strs[0].toString()); downFlow += Integer.parseInt(strs[1].toString()); } int sumFlow = upFlow+downFlow; context.write(key,new Text(upFlow+"\t"+downFlow+"\t"+sumFlow)); }
Example #23
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java From hadoopoffice with Apache License 2.0 | 5 votes |
@Test public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "multisheetheader.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "us"); job.set("hadoopoffice.read.header.read", "true"); job.set("hadoopoffice.read.header.skipheaderinallsheets", "true"); job.set("hadoopoffice.read.header.column.names.regex", "column"); job.set("hadoopoffice.read.header.column.names.replace", "spalte"); job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "stax"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0], " header column 1 correctly read"); assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1], " header column 2 correctly read"); assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2], " header column 3 correctly read"); }
Example #24
Source File: FileSplit.java From big-c with Apache License 2.0 | 5 votes |
@Override public void readFields(DataInput in) throws IOException { file = new Path(Text.readString(in)); start = in.readLong(); length = in.readLong(); hosts = null; }
Example #25
Source File: TestStCentroid.java From spatial-framework-for-hadoop with Apache License 2.0 | 5 votes |
@Test public void TestSimplePointCentroid() throws Exception { final ST_Centroid stCtr = new ST_Centroid(); final ST_Point stPt = new ST_Point(); BytesWritable bwGeom = stPt.evaluate(new Text("point (2 3)")); BytesWritable bwCentroid = stCtr.evaluate(bwGeom); validatePoint(new Point(2,3), bwCentroid); }
Example #26
Source File: GeoTempFlatMap.java From OSTMap with Apache License 2.0 | 5 votes |
@Override public void flatMap(Tuple2<Key, Value> value, Collector<Tuple2<Text, Mutation>> out) throws Exception { GeoTemporalKey gtk = GeoTemporalKey.buildKey(value.f1.toString()); if (gtk.rowBytes != null && gtk.columQualifier != null) { //create mutations for username and screen name Mutation m = new Mutation(gtk.rowBytes); m.put(value.f0.getRow().getBytes(), gtk.columQualifier, EMPTY_BYTES); out.collect(new Tuple2<>(new Text(outputTableName), m)); } }
Example #27
Source File: ByteUtil.java From fluo with Apache License 2.0 | 5 votes |
public static byte[] toByteArray(Text text) { byte[] bytes = text.getBytes(); if (bytes.length != text.getLength()) { bytes = new byte[text.getLength()]; System.arraycopy(text.getBytes(), 0, bytes, 0, bytes.length); } return bytes; }
Example #28
Source File: WeaklyConnectedComponentComputation.java From distributed-graph-analytics with Apache License 2.0 | 5 votes |
/** * Only called during the first superstep. * For Each Edge, find the one who has the greatest id and broadcast that to all neighbors. * * @param vertex The current vertex being operated on. */ private void broadcastGreatestNeighbor(Vertex<Text, Text, Text> vertex) { String maxId = vertex.getId().toString(); for (Edge<Text, Text> edge : vertex.getEdges()) { if (maxId.compareTo(edge.getTargetVertexId().toString()) < 0) { maxId = edge.getTargetVertexId().toString(); } } logger.debug("First Superstep for {}: Sending {} to all my edges.", vertex.getId(), maxId); broadcastUpdates(vertex, true, maxId); }
Example #29
Source File: OzoneManagerProtocolClientSideTranslatorPB.java From hadoop-ozone with Apache License 2.0 | 5 votes |
/** * Get a valid Delegation Token. * * @param renewer the designated renewer for the token * @return Token<OzoneDelegationTokenSelector> * @throws OMException */ @Override public Token<OzoneTokenIdentifier> getDelegationToken(Text renewer) throws OMException { GetDelegationTokenRequestProto req = GetDelegationTokenRequestProto .newBuilder() .setRenewer(renewer == null ? "" : renewer.toString()) .build(); OMRequest omRequest = createOMRequest(Type.GetDelegationToken) .setGetDelegationTokenRequest(req) .build(); final GetDelegationTokenResponseProto resp; try { resp = handleError(submitRequest(omRequest)).getGetDelegationTokenResponse(); return resp.getResponse().hasToken() ? OMPBHelper.convertToDelegationToken(resp.getResponse().getToken()) : null; } catch (IOException e) { if(e instanceof OMException) { throw (OMException)e; } throw new OMException("Get delegation token failed.", e, TOKEN_ERROR_OTHER); } }
Example #30
Source File: MySQLTextExportMapper.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** * Export the table to MySQL by using mysqlimport to write the data to the * database. * * Expects one delimited text record as the 'val'; ignores the key. */ @Override public void map(LongWritable key, Text val, Context context) throws IOException, InterruptedException { writeRecord(val.toString(), this.recordEndStr); // We don't emit anything to the OutputCollector because we wrote // straight to mysql. Send a progress indicator to prevent a timeout. context.progress(); }