org.apache.hadoop.io.Writable#toString

Source File: PGBulkloadExportMapper.java From aliyun-maxcompute-data-collectors with Apache License 2.0

7 votes

public void map(LongWritable key, Writable value, Context context)
  throws IOException, InterruptedException {
  try {
    String str = value.toString();
    if (value instanceof Text) {
      writer.write(str, 0, str.length());
      writer.newLine();
    } else if (value instanceof SqoopRecord) {
      writer.write(str, 0, str.length());
    }
  } catch (Exception e) {
    doExecuteUpdate("DROP TABLE " + tmpTableName);
    cleanup(context);
    throw new IOException(e);
  }
}

Source File: PatternMatcher.java From jumbune with GNU Lesser General Public License v3.0

6 votes

/**
 * It matches the value against given regular Expression. If value is null no need to match it with regEx
 * 
 * @param value
 *            - The value to be matched
 * @param regex
 *            - RegularExpression
 * @return true if value matches with RegEx false otherwise. If value is null false will be returned
 */
public static boolean match(Writable value, String regex) {
	if (value == null) {
		return false;
	}

	String valueStr = value.toString();
	if (valueStr == null || valueStr.length() == 0) {
		return false;
	}
	// TODO: Remove Pattern.compile
	Pattern p = Pattern.compile(regex);

	Matcher m = p.matcher(valueStr);
	return m.matches();
}

Source File: DynamoDBSerDe.java From emr-dynamodb-connector with Apache License 2.0

5 votes

@Override
public Object deserialize(Writable dataMap) throws SerDeException {
  if (!(dataMap instanceof DynamoDBItemWritable)) {
    throw new SerDeException("Expected DynamoDBMapWritable data type, got "
        + dataMap.getClass().getName() + " data: " + dataMap.toString());
  }
  return dataMap;
}

Source File: PatternMatcher.java From jumbune with GNU Lesser General Public License v3.0

5 votes

/**
 * It matches the key/value with given pattern.
 * 
 * @param value
 *            - The value to be matched
 * @param pattern
 *            - Pattern against which it should be matched
 * @return true if value matches with RegEx false otherwise. If value is null false will be returned
 */
public static boolean match(Writable value, Pattern pattern) {
	if (value == null) {
		return false;
	}

	String valueStr = value.toString();
	if (valueStr == null || valueStr.length() == 0) {
		return false;
	}

	Matcher m = pattern.matcher(valueStr);
	return m.matches();
}

Source File: UtilES.java From deep-spark with Apache License 2.0

5 votes

/**
 * Returns the object inside Writable
 *
 * @param writable
 * @return
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws InvocationTargetException
 * @throws NoSuchMethodException
 */
private static Object getObjectFromWritable(Writable writable)
        throws IllegalAccessException, InstantiationException, InvocationTargetException {
    Object object = null;

    if (writable instanceof NullWritable) {
        object = NullWritable.get();
    } else if (writable instanceof BooleanWritable) {
        object = ((BooleanWritable) writable).get();
    } else if (writable instanceof Text) {
        object = writable.toString();
    } else if (writable instanceof ByteWritable) {
        object = ((ByteWritable) writable).get();
    } else if (writable instanceof IntWritable) {
        object = ((IntWritable) writable).get();
    } else if (writable instanceof LongWritable) {
        object = ((LongWritable) writable).get();
    } else if (writable instanceof BytesWritable) {
        object = ((BytesWritable) writable).getBytes();
    } else if (writable instanceof DoubleWritable) {
        object = ((DoubleWritable) writable).get();
    } else if (writable instanceof FloatWritable) {
        object = ((FloatWritable) writable).get();
    } else {
        // TODO : do nothing
    }

    return object;
}

Source File: SolrSerde.java From hive-solr with MIT License

4 votes

@Override
public Object deserialize(Writable writable) throws SerDeException {
    final MapWritable input = (MapWritable) writable;
    final Text t = new Text();
    row.clear();

    for(int i=0;i<columnNames.size();i++){
        String k=columnNames.get(i);
        t.set(k);

        final Writable value = input.get(t);
        if (value != null && !NullWritable.get().equals(value)) {


            String colName = null;
            TypeInfo type_info = null;
            Object obj = null;

                colName = columnNames.get(i);
                type_info = columnTypes.get(i);
                obj = null;
                if (type_info.getCategory() == ObjectInspector.Category.PRIMITIVE) {
                    PrimitiveTypeInfo p_type_info = (PrimitiveTypeInfo) type_info;
                    switch (p_type_info.getPrimitiveCategory()) {
                        case STRING:
                            obj = value.toString();
                            break;
                        case LONG:
                        case INT:
                            try {
                                obj = Long.parseLong(value.toString());
                            } catch (Exception e) {
                                e.printStackTrace();
                            }
                    }
                }
                row.add(obj);


        }

    }

    return row;

}

Source File: DynamoDBExportSerDe.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public Object deserialize(Writable inputData) throws SerDeException {
  if (inputData == null) {
    return null;
  }
  if (inputData instanceof Text) {
    String data = inputData.toString();
    if (Strings.isNullOrEmpty(data)) {
      return null;
    }
    String collectionSplitCharacter = byteToString(1);

    List<String> fields = Arrays.asList(data.split(collectionSplitCharacter));

    if (fields.isEmpty()) {
      return null;
    }

    Map<String, AttributeValue> item = Maps.newHashMap();
    String mapSplitCharacter = byteToString(2);

    for (String field : fields) {
      if (Strings.isNullOrEmpty(field)) {
        throw new SerDeException("Empty fields in data: " + data);
      }
      List<String> values = Arrays.asList(field.split(mapSplitCharacter));
      if (values.size() != 2) {
        throw new SerDeException("Invalid record with map value: " + values);
      }
      String dynamoDBAttributeName = values.get(0);
      String dynamoDBAttributeValue = values.get(1);

      /* Deserialize the AttributeValue string */
      AttributeValue deserializedAttributeValue = HiveDynamoDBItemType
          .deserializeAttributeValue(dynamoDBAttributeValue);

      item.put(dynamoDBAttributeName, deserializedAttributeValue);
    }

    return new DynamoDBItemWritable(item);
  } else {
    throw new SerDeException(getClass().toString() + ": expects Text object!");
  }
}

Source File: MoreIndexingFilter.java From anthelion with Apache License 2.0

4 votes

/**
 * <p>
 * Add Content-Type and its primaryType and subType add contentType,
 * primaryType and subType to field "type" as un-stored, indexed and
 * un-tokenized, so that search results can be confined by contentType or its
 * primaryType or its subType.
 * </p>
 * <p>
 * For example, if contentType is application/vnd.ms-powerpoint, search can be
 * done with one of the following qualifiers
 * type:application/vnd.ms-powerpoint type:application type:vnd.ms-powerpoint
 * all case insensitive. The query filter is implemented in
 * {@link TypeQueryFilter}.
 * </p>
 *
 * @param doc
 * @param data
 * @param url
 * @return
 */
private NutchDocument addType(NutchDocument doc, ParseData data, String url,
    CrawlDatum datum) {
  String mimeType = null;
  String contentType = null;

  Writable tcontentType = datum.getMetaData().get(
      new Text(Response.CONTENT_TYPE));
  if (tcontentType != null) {
    contentType = tcontentType.toString();
  } else
    contentType = data.getMeta(Response.CONTENT_TYPE);
  if (contentType == null) {
    // Note by Jerome Charron on 20050415:
    // Content Type not solved by a previous plugin
    // Or unable to solve it... Trying to find it
    // Should be better to use the doc content too
    // (using MimeTypes.getMimeType(byte[], String), but I don't know
    // which field it is?
    // if (MAGIC) {
    //   contentType = MIME.getMimeType(url, content);
    // } else {
    //   contentType = MIME.getMimeType(url);
    // }
    mimeType = MIME.getMimeType(url);
  } else {
    mimeType = MIME.forName(MimeUtil.cleanMimeType(contentType));
  }

  // Checks if we solved the content-type.
  if (mimeType == null) {
    return doc;
  }

  // Check if we have to map mime types
  if (mapMimes) {
    // Check if the current mime is mapped
    if (mimeMap.containsKey(mimeType)) {
      // It's mapped, let's replace it
      mimeType = mimeMap.get(mimeType);
    }
  }

  contentType = mimeType;

  doc.add("type", contentType);

  // Check if we need to split the content type in sub parts
  if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
    String[] parts = getParts(contentType);

    for(String part: parts) {
      doc.add("type", part);
    }
  }

  // leave this for future improvement
  //MimeTypeParameterList parameterList = mimeType.getParameters()

  return doc;
}

Source File: MoreIndexingFilter.java From nutch-htmlunit with Apache License 2.0

4 votes

/**
 * <p>
 * Add Content-Type and its primaryType and subType add contentType,
 * primaryType and subType to field "type" as un-stored, indexed and
 * un-tokenized, so that search results can be confined by contentType or its
 * primaryType or its subType.
 * </p>
 * <p>
 * For example, if contentType is application/vnd.ms-powerpoint, search can be
 * done with one of the following qualifiers
 * type:application/vnd.ms-powerpoint type:application type:vnd.ms-powerpoint
 * all case insensitive. The query filter is implemented in
 * {@link TypeQueryFilter}.
 * </p>
 *
 * @param doc
 * @param data
 * @param url
 * @return
 */
private NutchDocument addType(NutchDocument doc, ParseData data, String url,
    CrawlDatum datum) {
  String mimeType = null;
  String contentType = null;

  Writable tcontentType = datum.getMetaData().get(
      new Text(Response.CONTENT_TYPE));
  if (tcontentType != null) {
    contentType = tcontentType.toString();
  } else
    contentType = data.getMeta(Response.CONTENT_TYPE);
  if (contentType == null) {
    // Note by Jerome Charron on 20050415:
    // Content Type not solved by a previous plugin
    // Or unable to solve it... Trying to find it
    // Should be better to use the doc content too
    // (using MimeTypes.getMimeType(byte[], String), but I don't know
    // which field it is?
    // if (MAGIC) {
    //   contentType = MIME.getMimeType(url, content);
    // } else {
    //   contentType = MIME.getMimeType(url);
    // }

    mimeType = tika.detect(url);
  } else {
    mimeType = MIME.forName(MimeUtil.cleanMimeType(contentType));
  }

  // Checks if we solved the content-type.
  if (mimeType == null) {
    return doc;
  }

  // Check if we have to map mime types
  if (mapMimes) {
    // Check if the current mime is mapped
    if (mimeMap.containsKey(mimeType)) {
      // It's mapped, let's replace it
      mimeType = mimeMap.get(mimeType);
    }
  }

  contentType = mimeType;
  doc.add("type", contentType);

  // Check if we need to split the content type in sub parts
  if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
    String[] parts = getParts(contentType);

    for(String part: parts) {
      doc.add("type", part);
    }
  }

  // leave this for future improvement
  //MimeTypeParameterList parameterList = mimeType.getParameters()

  return doc;
}

Java Code Examples for org.apache.hadoop.io.Writable#toString()