Java Code Examples for org.apache.pig.impl.util.ObjectSerializer#deserialize()
The following examples show how to use
org.apache.pig.impl.util.ObjectSerializer#deserialize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { // Save reader to use in getNext() this.reader = reader; splitIndex = split.getSplitIndex(); // Get schema from front-end UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE)); if (requiredFields != null) { numRequiredFields = 0; for (int i = 0; i < requiredFields.length; i++) { if (requiredFields[i]) numRequiredFields++; } } }
Example 2
Source File: HBaseStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public void setStoreLocation(String location, Job job) throws IOException { if (location.startsWith("hbase://")){ job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8)); }else{ job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location); } String serializedSchema = getUDFProperties().getProperty(contextSignature + "_schema"); if (serializedSchema!= null) { schema_ = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema); } m_conf = initializeLocalJobConfig(job); // Not setting a udf property and getting the hbase delegation token // only once like in setLocation as setStoreLocation gets different Job // objects for each call and the last Job passed is the one that is // launched. So we end up getting multiple hbase delegation tokens. addHBaseDelegationToken(m_conf, job); }
Example 3
Source File: PhoenixHBaseStorage.java From phoenix with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Parse the HBase table name and configure job */ @Override public void setStoreLocation(String location, Job job) throws IOException { String prefix = "hbase://"; if (location.startsWith(prefix)) { tableName = location.substring(prefix.length()); } config = new PhoenixPigConfiguration(job.getConfiguration()); config.configure(server, tableName, batchSize); String serializedSchema = getUDFProperties().getProperty(contextSignature + SCHEMA); if (serializedSchema != null) { schema = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema); } }
Example 4
Source File: PhoenixHBaseLoader.java From phoenix with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { this.reader = reader; final String resourceSchemaAsStr = getValueFromUDFContext(this.contextSignature,RESOURCE_SCHEMA_SIGNATURE); if (resourceSchemaAsStr == null) { throw new IOException("Could not find schema in UDF context"); } schema = (ResourceSchema)ObjectSerializer.deserialize(resourceSchemaAsStr); }
Example 5
Source File: TestPruneColumn.java From spork with Apache License 2.0 | 5 votes |
@Override public Tuple getNext() throws IOException { if (aliases==null) { aliases = (String[])ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass()).getProperty(signature)); Tuple t = TupleFactory.getInstance().newTuple(); for (String s : aliases) t.append(s); return t; } return null; }
Example 6
Source File: PigSchemaConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public static RequiredFieldList deserializeRequiredFieldList(String requiredFieldString) { if(requiredFieldString == null) { return null; } try { return (RequiredFieldList) ObjectSerializer.deserialize(requiredFieldString); } catch (IOException e) { throw new RuntimeException("Failed to deserialize pushProjection", e); } }
Example 7
Source File: PigBytesRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } ((BinInterSedes.BinInterSedesTupleRawComparator)mWrappedComp).setConf(conf); }
Example 8
Source File: PigFloatRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example 9
Source File: SchemaTuple.java From spork with Apache License 2.0 | 5 votes |
protected static Schema staticSchemaGen(String s) { try { if (s.equals("")) { Log.warn("No Schema present in SchemaTuple generated class"); return new Schema(); } return (Schema) ObjectSerializer.deserialize(s); } catch (IOException e) { throw new RuntimeException("Unable to deserialize serialized Schema: " + s, e); } }
Example 10
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
private TypeInfo getTypeInfo(String location, Job job) throws IOException { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); TypeInfo typeInfo = (TypeInfo) ObjectSerializer.deserialize(p.getProperty(signature + SchemaSignatureSuffix)); if (typeInfo == null) { typeInfo = getTypeInfoFromLocation(location, job); } if (typeInfo != null) { p.setProperty(signature + SchemaSignatureSuffix, ObjectSerializer.serialize(typeInfo)); } return typeInfo; }
Example 11
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void setLocation(String location, Job job) throws IOException { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); if (!UDFContext.getUDFContext().isFrontend()) { typeInfo = (TypeInfo)ObjectSerializer.deserialize(p.getProperty(signature + SchemaSignatureSuffix)); } else if (typeInfo == null) { typeInfo = getTypeInfo(location, job); } if (typeInfo != null && oi == null) { oi = OrcStruct.createObjectInspector(typeInfo); } if (!UDFContext.getUDFContext().isFrontend()) { if (p.getProperty(signature + RequiredColumnsSuffix) != null) { mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p .getProperty(signature + RequiredColumnsSuffix)); job.getConfiguration().setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, getReqiredColumnIdString(mRequiredColumns)); if (p.getProperty(signature + SearchArgsSuffix) != null) { // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, getReqiredColumnNamesString(getSchema(location, job), mRequiredColumns)); } } else if (p.getProperty(signature + SearchArgsSuffix) != null) { // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, getReqiredColumnNamesString(getSchema(location, job))); } if (p.getProperty(signature + SearchArgsSuffix) != null) { job.getConfiguration().set(SARG_PUSHDOWN, p.getProperty(signature + SearchArgsSuffix)); } } FileInputFormat.setInputPaths(job, location); }
Example 12
Source File: PigInputFormat.java From spork with Apache License 2.0 | 4 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override public org.apache.hadoop.mapreduce.RecordReader<Text, Tuple> createRecordReader( org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // We need to create a TaskAttemptContext based on the Configuration which // was used in the getSplits() to produce the split supplied here. For // this, let's find out the input of the script which produced the split // supplied here and then get the corresponding Configuration and setup // TaskAttemptContext based on it and then call the real InputFormat's // createRecordReader() method PigSplit pigSplit = (PigSplit)split; activeSplit = pigSplit; // XXX hadoop 20 new API integration: get around a hadoop 20 bug by // passing total # of splits to each split so it can be retrieved // here and set it to the configuration object. This number is needed // by PoissonSampleLoader to compute the number of samples int n = pigSplit.getTotalSplits(); context.getConfiguration().setInt("pig.mapsplits.count", n); Configuration conf = context.getConfiguration(); PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer .deserialize(conf.get("udf.import.list"))); MapRedUtil.setupUDFContext(conf); LoadFunc loadFunc = getLoadFunc(pigSplit.getInputIndex(), conf); // Pass loader signature to LoadFunc and to InputFormat through // the conf passLoadSignature(loadFunc, pigSplit.getInputIndex(), conf); // merge entries from split specific conf into the conf we got PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf); // for backward compatibility PigInputFormat.sJob = conf; InputFormat inputFormat = loadFunc.getInputFormat(); List<Long> inpLimitLists = (ArrayList<Long>)ObjectSerializer.deserialize( conf.get("pig.inpLimits")); return new PigRecordReader(inputFormat, pigSplit, loadFunc, context, inpLimitLists.get(pigSplit.getInputIndex())); }
Example 13
Source File: PigGenericMapReduce.java From spork with Apache License 2.0 | 4 votes |
/** * Configures the Reduce plan, the POPackage operator * and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); inIllustrator = inIllustrator(context); if (inIllustrator) pack = getPack(context); Configuration jConf = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf)); context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); sJobContext = context; sJobConfInternal.set(context.getConfiguration()); sJobConf = context.getConfiguration(); try { PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext)ObjectSerializer.deserialize(jConf.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve it SchemaTupleBackend.initialize(jConf, pigContext); if (rp == null) rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf .get("pig.reducePlan")); stores = PlanHelper.getPhysicalOperators(rp, POStore.class); if (!inIllustrator) pack = (POPackage)ObjectSerializer.deserialize(jConf.get("pig.reduce.package")); // To be removed if(rp.isEmpty()) log.debug("Reduce Plan empty!"); else{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); rp.explain(baos); log.debug(baos.toString()); } pigReporter = new ProgressableReporter(); if(!(rp.isEmpty())) { roots = rp.getRoots().toArray(new PhysicalOperator[1]); leaf = rp.getLeaves().get(0); } // Get the UDF specific context MapRedUtil.setupUDFContext(jConf); } catch (IOException ioe) { String msg = "Problem while configuring reduce plan."; throw new RuntimeException(msg, ioe); } log.info("Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }
Example 14
Source File: AvroStorage.java From spork with Apache License 2.0 | 4 votes |
/** * Set input location and obtain input schema. */ @SuppressWarnings("unchecked") @Override public void setLocation(String location, Job job) throws IOException { if (inputAvroSchema != null) { return; } if (!UDFContext.getUDFContext().isFrontend()) { Properties udfProps = getUDFProperties(); String mergedSchema = udfProps.getProperty(AVRO_MERGED_SCHEMA_PROPERTY); if (mergedSchema != null) { HashMap<URI, Map<Integer, Integer>> mergedSchemaMap = (HashMap<URI, Map<Integer, Integer>>) ObjectSerializer.deserialize(mergedSchema); schemaToMergedSchemaMap = new HashMap<Path, Map<Integer, Integer>>(); for (Entry<URI, Map<Integer, Integer>> entry : mergedSchemaMap.entrySet()) { schemaToMergedSchemaMap.put(new Path(entry.getKey()), entry.getValue()); } } String schema = udfProps.getProperty(AVRO_INPUT_SCHEMA_PROPERTY); if (schema != null) { try { inputAvroSchema = new Schema.Parser().parse(schema); return; } catch (Exception e) { // Cases like testMultipleSchemas2 cause exception while deserializing // symbols. In that case, we get it again. LOG.warn("Exception while trying to deserialize schema in backend. " + "Will construct again. schema= " + schema, e); } } } Configuration conf = job.getConfiguration(); Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true); if (!paths.isEmpty()) { // Set top level directories in input format. Adding all files will // bloat configuration size FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()])); // Scan all directories including sub directories for schema if (inputAvroSchema == null) { setInputAvroSchema(paths, conf); } } else { throw new IOException("Input path \'" + location + "\' is not found"); } }
Example 15
Source File: PigCombiner.java From spork with Apache License 2.0 | 4 votes |
/** * Configures the Reduce plan, the POPackage operator * and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration jConf = context.getConfiguration(); try { PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext)ObjectSerializer.deserialize(jConf.get("pig.pigContext")); if (pigContext.getLog4jProperties()!=null) PropertyConfigurator.configure(pigContext.getLog4jProperties()); cp = (PhysicalPlan) ObjectSerializer.deserialize(jConf .get("pig.combinePlan")); pack = (POPackage)ObjectSerializer.deserialize(jConf.get("pig.combine.package")); // To be removed if(cp.isEmpty()) log.debug("Combine Plan empty!"); else{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); cp.explain(baos); log.debug(baos.toString()); } keyType = ((byte[])ObjectSerializer.deserialize(jConf.get("pig.map.keytype")))[0]; // till here pigReporter = new ProgressableReporter(); if(!(cp.isEmpty())) { roots = cp.getRoots().toArray(new PhysicalOperator[1]); leaf = cp.getLeaves().get(0); } } catch (IOException ioe) { String msg = "Problem while configuring combiner's reduce plan."; throw new RuntimeException(msg, ioe); } // Avoid log spamming if (firstTime) { log.info("Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); firstTime = false; } }
Example 16
Source File: PigGenericMapBase.java From spork with Apache License 2.0 | 4 votes |
/** * Configures the mapper with the map plan and the * reproter thread */ @SuppressWarnings("unchecked") @Override public void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration job = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(job)); context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); PigMapReduce.sJobContext = context; PigMapReduce.sJobConfInternal.set(context.getConfiguration()); PigMapReduce.sJobConf = context.getConfiguration(); inIllustrator = inIllustrator(context); PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(job.get("udf.import.list"))); pigContext = (PigContext)ObjectSerializer.deserialize(job.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve it SchemaTupleBackend.initialize(job, pigContext); if (pigContext.getLog4jProperties()!=null) PropertyConfigurator.configure(pigContext.getLog4jProperties()); if (mp == null) mp = (PhysicalPlan) ObjectSerializer.deserialize( job.get("pig.mapPlan")); stores = PlanHelper.getPhysicalOperators(mp, POStore.class); // To be removed if(mp.isEmpty()) log.debug("Map Plan empty!"); else{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); mp.explain(baos); log.debug(baos.toString()); } keyType = ((byte[])ObjectSerializer.deserialize(job.get("pig.map.keytype")))[0]; // till here pigReporter = new ProgressableReporter(); // Get the UDF specific context MapRedUtil.setupUDFContext(job); if(!(mp.isEmpty())) { PigSplit split = (PigSplit)context.getInputSplit(); List<OperatorKey> targetOpKeys = split.getTargetOps(); ArrayList<PhysicalOperator> targetOpsAsList = new ArrayList<PhysicalOperator>(); for (OperatorKey targetKey : targetOpKeys) { targetOpsAsList.add(mp.getOperator(targetKey)); } roots = targetOpsAsList.toArray(new PhysicalOperator[1]); leaf = mp.getLeaves().get(0); } PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new MRTaskContext(context)); log.info("Aliases being processed per job phase (AliasName[line,offset]): " + job.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }
Example 17
Source File: AvroStorage.java From Cubert with Apache License 2.0 | 4 votes |
/** * Set input location and obtain input schema. */ @SuppressWarnings("unchecked") @Override public void setLocation(String location, Job job) throws IOException { if (inputAvroSchema != null) { return; } if (!UDFContext.getUDFContext().isFrontend()) { Properties udfProps = getUDFProperties(); String mergedSchema = udfProps.getProperty(AVRO_MERGED_SCHEMA_PROPERTY); if (mergedSchema != null) { HashMap<URI, Map<Integer, Integer>> mergedSchemaMap = (HashMap<URI, Map<Integer, Integer>>) ObjectSerializer.deserialize(mergedSchema); schemaToMergedSchemaMap = new HashMap<Path, Map<Integer, Integer>>(); for (Entry<URI, Map<Integer, Integer>> entry : mergedSchemaMap.entrySet()) { schemaToMergedSchemaMap.put(new Path(entry.getKey()), entry.getValue()); } } String schema = udfProps.getProperty(AVRO_INPUT_SCHEMA_PROPERTY); if (schema != null) { try { inputAvroSchema = new Schema.Parser().parse(schema); return; } catch (Exception e) { // Cases like testMultipleSchemas2 cause exception while deserializing // symbols. In that case, we get it again. LOG.warn("Exception while trying to deserialize schema in backend. " + "Will construct again. schema= " + schema, e); } } } Configuration conf = job.getConfiguration(); Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true); if (!paths.isEmpty()) { // Set top level directories in input format. Adding all files will // bloat configuration size FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()])); // Scan all directories including sub directories for schema if (inputAvroSchema == null) { setInputAvroSchema(paths, conf); } } else { throw new IOException("Input path \'" + location + "\' is not found"); } }
Example 18
Source File: CSVLoader.java From spork with Apache License 2.0 | 4 votes |
@Override public Tuple getNext() throws IOException { mProtoTuple = new ArrayList<Object>(); boolean inField = false; boolean inQuotedField = false; boolean evenQuotesSeen = true; if (!mRequiredColumnsInitialized) { if (signature != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); mRequiredColumns = (boolean[])ObjectSerializer.deserialize(p.getProperty(signature)); } mRequiredColumnsInitialized = true; } try { if (!in.nextKeyValue()) { return null; } Text value = (Text) in.getCurrentValue(); byte[] buf = value.getBytes(); int len = value.getLength(); int fieldID = 0; ByteBuffer fieldBuffer = ByteBuffer.allocate(len); for (int i = 0; i < len; i++) { byte b = buf[i]; inField = true; if (inQuotedField) { if (b == DOUBLE_QUOTE) { evenQuotesSeen = !evenQuotesSeen; if (evenQuotesSeen) { fieldBuffer.put(DOUBLE_QUOTE); } } else if (!evenQuotesSeen && (b == FIELD_DEL || b == RECORD_DEL)) { inQuotedField = false; inField = false; readField(fieldBuffer, fieldID++); } else { fieldBuffer.put(b); } } else if (b == DOUBLE_QUOTE) { inQuotedField = true; evenQuotesSeen = true; } else if (b == FIELD_DEL) { inField = false; readField(fieldBuffer, fieldID++); // end of the field } else { evenQuotesSeen = true; fieldBuffer.put(b); } } if (inField) readField(fieldBuffer, fieldID++); } catch (InterruptedException e) { int errCode = 6018; String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e); } Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple); return t; }
Example 19
Source File: IcebergStorage.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public <T extends Serializable> T getFromUDFContext(String key, Class<T> clazz) throws IOException { Properties properties = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{signature}); return (T) ObjectSerializer.deserialize(properties.getProperty(key)); }
Example 20
Source File: IcebergPigInputFormat.java From iceberg with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") private boolean advance() throws IOException { if (reader != null) { reader.close(); } if (!tasks.hasNext()) { return false; } FileScanTask currentTask = tasks.next(); Schema tableSchema = (Schema) ObjectSerializer.deserialize(context.getConfiguration().get(scope(ICEBERG_SCHEMA))); LOG.debug("[{}]: Task table schema: {}", signature, tableSchema); List<String> projectedFields = (List<String>) ObjectSerializer.deserialize(context.getConfiguration().get(scope(ICEBERG_PROJECTED_FIELDS))); LOG.debug("[{}]: Task projected fields: {}", signature, projectedFields); Schema projectedSchema = projectedFields != null ? SchemaUtil.project(tableSchema, projectedFields) : tableSchema; PartitionSpec spec = currentTask.asFileScanTask().spec(); DataFile file = currentTask.file(); InputFile inputFile = HadoopInputFile.fromLocation(file.path(), context.getConfiguration()); Set<Integer> idColumns = spec.identitySourceIds(); // schema needed for the projection and filtering boolean hasJoinedPartitionColumns = !idColumns.isEmpty(); switch (file.format()) { case PARQUET: Map<Integer, Object> partitionValueMap = Maps.newHashMap(); if (hasJoinedPartitionColumns) { Schema readSchema = TypeUtil.selectNot(projectedSchema, idColumns); Schema projectedPartitionSchema = TypeUtil.select(projectedSchema, idColumns); Map<String, Integer> partitionSpecFieldIndexMap = Maps.newHashMap(); for (int i = 0; i < spec.fields().size(); i++) { partitionSpecFieldIndexMap.put(spec.fields().get(i).name(), i); } for (Types.NestedField field : projectedPartitionSchema.columns()) { int partitionIndex = partitionSpecFieldIndexMap.get(field.name()); Object partitionValue = file.partition().get(partitionIndex, Object.class); partitionValueMap.put(field.fieldId(), convertPartitionValue(field.type(), partitionValue)); } reader = Parquet.read(inputFile) .project(readSchema) .split(currentTask.start(), currentTask.length()) .filter(currentTask.residual()) .createReaderFunc( fileSchema -> PigParquetReader.buildReader(fileSchema, projectedSchema, partitionValueMap)) .build(); } else { reader = Parquet.read(inputFile) .project(projectedSchema) .split(currentTask.start(), currentTask.length()) .filter(currentTask.residual()) .createReaderFunc( fileSchema -> PigParquetReader.buildReader(fileSchema, projectedSchema, partitionValueMap)) .build(); } recordIterator = reader.iterator(); break; default: throw new UnsupportedOperationException("Unsupported file format: " + file.format()); } return true; }