Java Code Examples for org.apache.pig.newplan.logical.relational.LogicalSchema#size()
The following examples show how to use
org.apache.pig.newplan.logical.relational.LogicalSchema#size() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnPruneHelper.java From spork with Apache License 2.0 | 6 votes |
@Override public void visit(LOStore store) throws FrontendException { Set<Long> output = setOutputUids(store); if (output.isEmpty()) { // to deal with load-store-load-store case LogicalSchema s = store.getSchema(); if (s == null) { throw new SchemaNotDefinedException("Schema for " + store.getName() + " is not defined."); } for(int i=0; i<s.size(); i++) { output.add(s.getField(i).uid); } } // for store, input uids are same as output uids store.annotate(INPUTUIDS, output); }
Example 2
Source File: AugmentBaseDataVisitor.java From spork with Apache License 2.0 | 6 votes |
private boolean inInput(Tuple newTuple, DataBag input, LogicalSchema schema) throws ExecException { boolean result; for (Iterator<Tuple> iter = input.iterator(); iter.hasNext();) { result = true; Tuple tmp = iter.next(); for (int i = 0; i < schema.size(); ++i) if (!newTuple.get(i).equals(tmp.get(i))) { result = false; break; } if (result) return true; } return false; }
Example 3
Source File: DereferenceExpression.java From spork with Apache License 2.0 | 6 votes |
private List<Integer> translateAliasToPos(LogicalSchema schema, List<Object> rawColumns) throws FrontendException { List<Integer> columns = new ArrayList<Integer>(); for( Object rawColumn : rawColumns ) { if( rawColumn instanceof Integer ) { if (schema!=null && ((Integer)rawColumn>=schema.size() || (Integer)rawColumn<0)) { throw new FrontendException("Index "+rawColumn + " out of range in schema:" + schema.toString(false), 1127); } columns.add( (Integer)rawColumn ); } else { int pos = schema.getFieldPosition((String)rawColumn); if( pos != -1) { columns.add( pos ); continue; } else { throw new FrontendException("Cannot find field " + rawColumn + " in " + schema.toString(false), 1128); } } } return columns; }
Example 4
Source File: ColumnPruneHelper.java From spork with Apache License 2.0 | 6 votes |
@Override public void visit(LODistinct distinct) throws FrontendException { setOutputUids(distinct); Set<Long> input = new HashSet<Long>(); // Every field is required LogicalSchema s = distinct.getSchema(); if (s == null) { throw new SchemaNotDefinedException("Schema for " + distinct.getName() + " is not defined."); } for(int i=0; i<s.size(); i++) { input.add(s.getField(i).uid); } distinct.annotate(INPUTUIDS, input); }
Example 5
Source File: ColumnPruneVisitor.java From spork with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private void addForEachIfNecessary(LogicalRelationalOperator op) throws FrontendException { Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS); if (outputUids!=null) { LogicalSchema schema = op.getSchema(); Set<Integer> columnsToDrop = new HashSet<Integer>(); for (int i=0;i<schema.size();i++) { if (!outputUids.contains(schema.getField(i).uid)) columnsToDrop.add(i); } if (!columnsToDrop.isEmpty()) { LOForEach foreach = Util.addForEachAfter((LogicalPlan)op.getPlan(), op, 0, columnsToDrop); foreach.getSchema(); } } }
Example 6
Source File: AddForEach.java From spork with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private void addForeach(LogicalRelationalOperator op) throws FrontendException { Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS); LogicalSchema schema = op.getSchema(); Set<Integer> columnsToDrop = new HashSet<Integer>(); for (int i=0;i<schema.size();i++) { if (!outputUids.contains(schema.getField(i).uid)) columnsToDrop.add(i); } if (!columnsToDrop.isEmpty()) { Util.addForEachAfter((LogicalPlan)op.getPlan(), op, 0, columnsToDrop); } }
Example 7
Source File: AddForEach.java From spork with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") // check if an LOForEach should be added after the logical operator private boolean shouldAdd(LogicalRelationalOperator op) throws FrontendException { if (op instanceof LOForEach) { return false; } Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS); if (outputUids==null) return false; LogicalSchema schema = op.getSchema(); if (schema==null) return false; // check if there is already a foreach List<Operator> ll = op.getPlan().getSuccessors(op); if (ll != null && ll.get(0) instanceof LOForEach) { return false; } Set<Integer> columnsToDrop = new HashSet<Integer>(); for (int i=0;i<schema.size();i++) { if (!outputUids.contains(schema.getField(i).uid)) columnsToDrop.add(i); } if (!columnsToDrop.isEmpty()) return true; return false; }
Example 8
Source File: PartitionFilterOptimizer.java From spork with Apache License 2.0 | 5 votes |
protected void setupColNameMaps() throws FrontendException { LogicalSchema loLoadSchema = loLoad.getSchema(); LogicalSchema loadFuncSchema = loLoad.getDeterminedSchema(); for(int i = 0; i < loadFuncSchema.size(); i++) { colNameMap.put(loadFuncSchema.getField(i).alias, (i < loLoadSchema.size() ? loLoadSchema.getField(i).alias : loadFuncSchema.getField(i).alias)); reverseColNameMap.put((i < loLoadSchema.size() ? loLoadSchema.getField(i).alias : loadFuncSchema.getField(i).alias), loadFuncSchema.getField(i).alias); } }
Example 9
Source File: PigTypes.java From calcite with Apache License 2.0 | 5 votes |
/** * Converts a Pig tuple schema to a SQL row type. * * @param pigSchema Pig tuple schema * @param nullable true if the type is nullable * @return a SQL row type */ static RelDataType convertSchema(LogicalSchema pigSchema, boolean nullable) { if (pigSchema != null && pigSchema.size() > 0) { List<String> fieldNameList = new ArrayList<>(); List<RelDataType> typeList = new ArrayList<>(); for (int i = 0; i < pigSchema.size(); i++) { final LogicalSchema.LogicalFieldSchema subPigField = pigSchema.getField(i); fieldNameList.add(subPigField.alias != null ? subPigField.alias : "$" + i); typeList.add(convertSchemaField(subPigField, nullable)); } return TYPE_FACTORY.createStructType(typeList, fieldNameList, nullable); } return new DynamicTupleRecordType(TYPE_FACTORY); }
Example 10
Source File: TypeCastInserter.java From spork with Apache License 2.0 | 5 votes |
private boolean atLeastOneCastNeeded(LogicalSchema determinedSchema, LogicalSchema s) { for (int i = 0; i < s.size(); i++) { LogicalSchema.LogicalFieldSchema fs = s.getField(i); if (fs.type != DataType.BYTEARRAY && (determinedSchema == null || (!fs.isEqual(determinedSchema.getField(i))))) { // we have to cast this field from the default BYTEARRAY type to // whatever the user specified in the 'AS' clause of the LOAD // statement (the fs.type). return true; } } return false; }
Example 11
Source File: ColumnPruneHelper.java From spork with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") // Get output uid from output schema. If output schema does not exist, // throw exception private Set<Long> setOutputUids(LogicalRelationalOperator op) throws FrontendException { List<Operator> ll = plan.getSuccessors(op); Set<Long> uids = new HashSet<Long>(); LogicalSchema s = op.getSchema(); if (s == null) { throw new SchemaNotDefinedException("Schema for " + op.getName() + " is not defined."); } if (ll != null) { // if this is not sink, the output uids are union of input uids of its successors for(Operator succ: ll) { Set<Long> inputUids = (Set<Long>)succ.getAnnotation(INPUTUIDS); if (inputUids != null) { Iterator<Long> iter = inputUids.iterator(); while(iter.hasNext()) { long uid = iter.next(); if (s.findField(uid) != -1) { uids.add(uid); } } } } } else { // if it's leaf, set to its schema for(int i=0; i<s.size(); i++) { uids.add(s.getField(i).uid); } } op.annotate(OUTPUTUIDS, uids); return uids; }
Example 12
Source File: FilterAboveForeach.java From spork with Apache License 2.0 | 5 votes |
/** * Get all uids from Projections of this FilterOperator * @param filter * @return Set of uid */ private Pair<List<Long>, List<Byte>> getFilterProjectionUids(LOFilter filter) throws FrontendException { List<Long> uids = new ArrayList<Long>(); List<Byte> types = new ArrayList<Byte>(); if( filter != null ) { LogicalExpressionPlan filterPlan = filter.getFilterPlan(); Iterator<Operator> iter = filterPlan.getOperators(); Operator op = null; while( iter.hasNext() ) { op = iter.next(); if( op instanceof ProjectExpression ) { ProjectExpression proj = (ProjectExpression)op; if( proj.isProjectStar() ) { //project-range is always expanded when schema is //available, so nothing to do here for it LogicalRelationalOperator pred = (LogicalRelationalOperator)filter.getPlan().getPredecessors(filter).get(0); LogicalSchema predSchema = pred.getSchema(); if (predSchema!=null) { for (int i=0;i<predSchema.size();i++) { uids.add(predSchema.getField(i).uid); types.add(predSchema.getField(i).type); } } } else { uids.add(proj.getFieldSchema().uid); types.add(proj.getFieldSchema().type); } } } } Pair<List<Long>, List<Byte>> result = new Pair<List<Long>, List<Byte>>(uids, types); return result; }
Example 13
Source File: PredicatePushdownOptimizer.java From spork with Apache License 2.0 | 5 votes |
protected void setupColNameMaps() throws FrontendException { LogicalSchema loLoadSchema = loLoad.getSchema(); LogicalSchema loadFuncSchema = loLoad.getDeterminedSchema(); for(int i = 0; i < loadFuncSchema.size(); i++) { colNameMap.put(loadFuncSchema.getField(i).alias, (i < loLoadSchema.size() ? loLoadSchema.getField(i).alias : loadFuncSchema.getField(i).alias)); reverseColNameMap.put((i < loLoadSchema.size() ? loLoadSchema.getField(i).alias : loadFuncSchema.getField(i).alias), loadFuncSchema.getField(i).alias); } }
Example 14
Source File: LineageFindRelVisitor.java From spork with Apache License 2.0 | 5 votes |
/** * Find single load func spec associated with this relation. * If the relation has schema, all uids in schema should be associated * with same load func spec. if it does not have schema check the existing * mapping * @param relOp * @return * @throws FrontendException */ private FuncSpec getAssociatedLoadFunc(LogicalRelationalOperator relOp) throws FrontendException { LogicalSchema schema = relOp.getSchema(); FuncSpec funcSpec = null; if(schema != null){ if(schema.size() == 0) return null; funcSpec = uid2LoadFuncMap.get(schema.getField(0).uid); if(funcSpec != null) { for(int i=1; i<schema.size(); i++){ LogicalFieldSchema fs = schema.getField(i); if(! haveIdenticalCasters(funcSpec, uid2LoadFuncMap.get(fs.uid))){ //all uid are not associated with same func spec, there is no // single func spec that represents all the fields funcSpec = null; break; } } } } if(funcSpec == null){ // If relOp is LOForEach and contains UDF, byte field could come from UDF. // We don't assume it share the LoadCaster with predecessor if (relOp instanceof LOForEach) { UDFFinder udfFinder = new UDFFinder(((LOForEach) relOp).getInnerPlan()); udfFinder.visit(); if (udfFinder.getUDFList().size()!=0) return null; } funcSpec = rel2InputFuncMap.get(relOp); } return funcSpec; }
Example 15
Source File: TypeCastInserter.java From spork with Apache License 2.0 | 5 votes |
@Override public boolean check(OperatorPlan matched) throws FrontendException { LogicalRelationalOperator op = (LogicalRelationalOperator)matched.getSources().get(0); LogicalSchema s = op.getSchema(); if (s == null) return false; // only process each node once if (isCastAdjusted(op)) return false; if (op instanceof LOLoad) { if (((LOLoad)op).getScriptSchema()==null) return false; } else { if (((LOStream)op).getScriptSchema()==null) return false; } // Now that we've narrowed it down to an operation that *can* have casts added, // (because the user specified some types which might not match the data) let's // see if they're actually needed: LogicalSchema determinedSchema = determineSchema(op); if(atLeastOneCastNeeded(determinedSchema, s)) { return true; } if(determinedSchema == null || determinedSchema.size() != s.size()) { // we don't know what the data looks like, but the user has specified // that they want a certain number of fields loaded. We'll use a // projection (or pruning) to make sure the columns show up (with NULL // values) or are truncated from the right hand side of the input data. return true; } return false; }
Example 16
Source File: ProjectExpression.java From spork with Apache License 2.0 | 5 votes |
private int findColNum(String alias) throws FrontendException { LogicalPlan lp = (LogicalPlan)attachedRelationalOp.getPlan(); List<Operator> inputs = lp.getPredecessors( attachedRelationalOp ); LogicalRelationalOperator input = (LogicalRelationalOperator)inputs.get( getInputNum() ); LogicalSchema inputSchema = input.getSchema(); if( alias != null ) { int colNum = inputSchema == null ? -1 : inputSchema.getFieldPosition( alias ); if( colNum == -1 ) { String msg = "Invalid field projection. Projected field [" + alias + "] does not exist"; if( inputSchema != null ) msg += " in schema: " + inputSchema.toString( false ); msg += "."; throw new PlanValidationException( this, msg, 1025 ); } return colNum; } else { int col = getColNum(); if( inputSchema != null && col >= inputSchema.size() ) { throw new PlanValidationException( this, "Out of bound access. Trying to access non-existent column: " + col + ". Schema " + inputSchema.toString(false) + " has " + inputSchema.size() + " column(s)." , 1000); } return col; } }
Example 17
Source File: ColumnPruneHelper.java From spork with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public boolean check() throws FrontendException { List<Operator> sources = currentPlan.getSources(); // if this rule has run before, just return false if (sources.size() > 1 && sources.get(0).getAnnotation(INPUTUIDS) != null) { clearAnnotation(); return false; } // create sub-plan that ends with foreach subPlan = getSubPlan(); if (subPlan.size() == 0) { clearAnnotation(); return false; } ColumnDependencyVisitor v = new ColumnDependencyVisitor(currentPlan); try { v.visit(); }catch(SchemaNotDefinedException e) { // if any operator has an unknown schema, just return false clearAnnotation(); return false; } List<Operator> ll = subPlan.getSources(); boolean found = false; for(Operator op: ll) { if (op instanceof LOLoad) { Set<Long> uids = (Set<Long>)op.getAnnotation(INPUTUIDS); LogicalSchema s = ((LOLoad) op).getSchema(); Set<Integer> required = getColumns(s, uids); if (required.size() < s.size()) { op.annotate(REQUIREDCOLS, required); found = true; } } } if (!found) clearAnnotation(); return found; }
Example 18
Source File: ProjectStarExpanderUtil.java From spork with Apache License 2.0 | 4 votes |
/** * If the argument project is a project-star or project-range that * can be expanded, find the position of first and last columns * it should project * @param expPlan * @param proj * @return pair that has the first and last columns that need to be projected * @throws FrontendException */ static Pair<Integer, Integer> getProjectStartEndCols( LogicalExpressionPlan expPlan, ProjectExpression proj) throws FrontendException { // get the input schema first LogicalRelationalOperator relOp = proj.getAttachedRelationalOp(); // list of inputs of attached relation List<Operator> inputRels = relOp.getPlan().getPredecessors(relOp); //the relation that is input to this project LogicalRelationalOperator inputRel = (LogicalRelationalOperator) inputRels.get(proj.getInputNum()); LogicalSchema inputSchema = inputRel.getSchema(); if(inputSchema == null && (proj.isProjectStar() || (proj.isRangeProject() && proj.getEndCol() == -1)) ){ // can't expand if input schema is null and it is a project-star // or project-range-until-end return null; } //find first and last column in input schema to be projected int firstProjCol; int lastProjCol; //the range values are set in the project in LOInnerLoad if(proj.isRangeProject()){ proj.setColumnNumberFromAlias(); firstProjCol = proj.getStartCol(); if(proj.getEndCol() >= 0) lastProjCol = proj.getEndCol(); else lastProjCol = inputSchema.size() - 1; }else{ //project-star firstProjCol = 0; lastProjCol = inputSchema.size() - 1; } return new Pair<Integer, Integer>(firstProjCol, lastProjCol); }
Example 19
Source File: Util.java From spork with Apache License 2.0 | 4 votes |
public static LOForEach addForEachAfter(LogicalPlan plan, LogicalRelationalOperator op, int branch, Set<Integer> columnsToDrop) throws FrontendException { LOForEach foreach = new LOForEach(plan); plan.add(foreach); List<Operator> next = plan.getSuccessors(op); if (next != null) { LogicalRelationalOperator nextOp = (LogicalRelationalOperator)next.get(branch); plan.insertBetween(op, foreach, nextOp); foreach.setAlias(op.getAlias()); } else { plan.connect(op, foreach); } LogicalPlan innerPlan = new LogicalPlan(); foreach.setInnerPlan(innerPlan); LogicalSchema schema = op.getSchema(); // build foreach inner plan List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>(); LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[schema.size()-columnsToDrop.size()]); innerPlan.add(gen); for (int i=0, j=0; i<schema.size(); i++) { if (columnsToDrop.contains(i)) { continue; } LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, i); innerPlan.add(innerLoad); innerPlan.connect(innerLoad, gen); LogicalExpressionPlan exp = new LogicalExpressionPlan(); ProjectExpression prj = new ProjectExpression(exp, j++, -1, gen); exp.add(prj); exps.add(exp); } return foreach; }