Java Code Examples for org.apache.parquet.example.data.simple.SimpleGroup#getFieldRepetitionCount()
The following examples show how to use
org.apache.parquet.example.data.simple.SimpleGroup#getFieldRepetitionCount() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetAsTextInputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 5 votes |
protected List<String> groupToStrings(SimpleGroup grp) { ArrayList<String> s = new ArrayList<>(); for (int n = 0; n < grp.getType().getFieldCount(); n ++) { Type field = grp.getType().getType(n); try { if (!field.isPrimitive()) s.addAll(groupToStrings((SimpleGroup) grp.getGroup(n, 0))); // array of groups not (yet) supported else if (field.getRepetition() == Type.Repetition.REPEATED) { boolean is_binary = field.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY; StringBuilder sb = new StringBuilder("["); ArrayList<String> arr = new ArrayList<>(); for (int i = 0; i < grp.getFieldRepetitionCount(n); i ++) arr.add(is_binary ? "\"" + grp.getValueToString(n, i) + "\"" : grp.getValueToString(n, i)); sb.append(Joiner.on(", ").join(arr)); sb.append("]"); s.add(sb.toString()); } else s.add(grp.getValueToString(n, 0)); } catch (RuntimeException e) { if(e.getMessage().startsWith("not found") && field.getRepetition() == Type.Repetition.OPTIONAL) s.add(""); else throw e; } } return s; }
Example 2
Source File: ParquetAsJsonInputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 4 votes |
private void groupToJson(JsonGenerator currentGenerator, SimpleGroup grp) throws IOException { GroupType gt = grp.getType(); currentGenerator.writeStartObject(); for(int i = 0; i < gt.getFieldCount(); i ++) { String field = gt.getFieldName(i); try { Type t = gt.getType(i); int repetition = 1; boolean repeated = false; if (t.getRepetition() == Type.Repetition.REPEATED) { repeated = true; repetition = grp.getFieldRepetitionCount(i); currentGenerator.writeArrayFieldStart(field); } else currentGenerator.writeFieldName(field); for(int j = 0; j < repetition; j ++) { if (t.isPrimitive()) { switch (t.asPrimitiveType().getPrimitiveTypeName()) { case BINARY: currentGenerator.writeString(grp.getString(i, j)); break; case INT32: currentGenerator.writeNumber(grp.getInteger(i, j)); break; case INT96: case INT64: // clumsy way - TODO - Subclass SimpleGroup or something like that currentGenerator.writeNumber(Long.parseLong(grp.getValueToString(i, j))); break; case DOUBLE: case FLOAT: currentGenerator.writeNumber(Double.parseDouble(grp.getValueToString(i, j))); break; case BOOLEAN: currentGenerator.writeBoolean(grp.getBoolean(i, j)); break; default: throw new RuntimeException("Can't handle type " + gt.getType(i)); } } else { groupToJson(currentGenerator, (SimpleGroup) grp.getGroup(i, j)); } } if (repeated) currentGenerator.writeEndArray(); } catch (Exception e) { if (e.getMessage().startsWith("not found") && gt.getType(i).getRepetition() == Type.Repetition.OPTIONAL) currentGenerator.writeNull(); else throw new RuntimeException(e); } } currentGenerator.writeEndObject(); }