org.apache.spark.sql.catalyst.plans.JoinType Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.plans.JoinType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: basicOperators.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.mv.plans.modular import org.apache.spark.sql.catalyst.expressions.{Attribute, _} import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.carbondata.mv.plans._ import org.apache.carbondata.mv.plans.modular.Flags._ trait Matchable extends ModularPlan { def outputList: Seq[NamedExpression] def predicateList: Seq[Expression] } case class GroupBy( outputList: Seq[NamedExpression], inputList: Seq[Expression], predicateList: Seq[Expression], alias: Option[String], child: ModularPlan, flags: FlagSet, flagSpec: Seq[Seq[Any]], modularPlan: Option[ModularPlan] = None) extends UnaryNode with Matchable { override def output: Seq[Attribute] = outputList.map(_.toAttribute) override def makeCopy(newArgs: Array[AnyRef]): GroupBy = { val groupBy = super.makeCopy(newArgs).asInstanceOf[GroupBy] if (rewritten) groupBy.setRewritten() groupBy } } case class Select( outputList: Seq[NamedExpression], inputList: Seq[Expression], predicateList: Seq[Expression], aliasMap: Map[Int, String], joinEdges: Seq[JoinEdge], children: Seq[ModularPlan], flags: FlagSet, flagSpec: Seq[Seq[Any]], windowSpec: Seq[Seq[Any]], modularPlan: Option[ModularPlan] = None) extends ModularPlan with Matchable { override def output: Seq[Attribute] = outputList.map(_.toAttribute) override def adjacencyList: scala.collection.immutable.Map[Int, Seq[(Int, JoinType)]] = { joinEdges.groupBy { _.left }.map { case (k, v) => (k, v.map(e => (e.right, e.joinType))) } } override def extractJoinConditions( left: ModularPlan, right: ModularPlan): Seq[Expression] = { predicateList.filter(p => p.references.intersect(left.outputSet).nonEmpty && p.references.intersect(right.outputSet).nonEmpty && p.references.subsetOf(left.outputSet ++ right.outputSet)) } override def extractRightEvaluableConditions( left: ModularPlan, right: ModularPlan): Seq[Expression] = { predicateList.filter(p => p.references.subsetOf(left.outputSet ++ right.outputSet) && p.references.intersect(right.outputSet).nonEmpty) } override def extractEvaluableConditions(plan: ModularPlan): Seq[Expression] = { predicateList.filter(p => canEvaluate(p, plan)) } override def makeCopy(newArgs: Array[AnyRef]): Select = { val select = super.makeCopy(newArgs).asInstanceOf[Select] if (rewritten) select.setRewritten() select } } case class Union(children: Seq[ModularPlan], flags: FlagSet, flagSpec: Seq[Seq[Any]]) extends ModularPlan { override def output: Seq[Attribute] = children.head.output } case object OneRowTable extends LeafNode { override def output: Seq[Attribute] = Nil }
Example 2
Source File: ShuffledHashOuterJoin.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.joins import scala.collection.JavaConversions._ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.catalyst.plans.{FullOuter, JoinType, LeftOuter, RightOuter} import org.apache.spark.sql.execution.{BinaryNode, SparkPlan} import org.apache.spark.sql.execution.metric.SQLMetrics @DeveloperApi case class ShuffledHashOuterJoin( leftKeys: Seq[Expression], rightKeys: Seq[Expression], joinType: JoinType, condition: Option[Expression], left: SparkPlan, right: SparkPlan) extends BinaryNode with HashOuterJoin { override private[sql] lazy val metrics = Map( "numLeftRows" -> SQLMetrics.createLongMetric(sparkContext, "number of left rows"), "numRightRows" -> SQLMetrics.createLongMetric(sparkContext, "number of right rows"), "numOutputRows" -> SQLMetrics.createLongMetric(sparkContext, "number of output rows")) override def requiredChildDistribution: Seq[Distribution] = ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil override def outputPartitioning: Partitioning = joinType match { case LeftOuter => left.outputPartitioning case RightOuter => right.outputPartitioning case FullOuter => UnknownPartitioning(left.outputPartitioning.numPartitions) case x => throw new IllegalArgumentException(s"HashOuterJoin should not take $x as the JoinType") } protected override def doExecute(): RDD[InternalRow] = { val numLeftRows = longMetric("numLeftRows") val numRightRows = longMetric("numRightRows") val numOutputRows = longMetric("numOutputRows") val joinedRow = new JoinedRow() left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) => // TODO this probably can be replaced by external sort (sort merged join?) joinType match { case LeftOuter => val hashed = HashedRelation(rightIter, numRightRows, buildKeyGenerator) val keyGenerator = streamedKeyGenerator val resultProj = resultProjection leftIter.flatMap( currentRow => { numLeftRows += 1 val rowKey = keyGenerator(currentRow) joinedRow.withLeft(currentRow) leftOuterIterator(rowKey, joinedRow, hashed.get(rowKey), resultProj, numOutputRows) }) case RightOuter => val hashed = HashedRelation(leftIter, numLeftRows, buildKeyGenerator) val keyGenerator = streamedKeyGenerator val resultProj = resultProjection rightIter.flatMap ( currentRow => { numRightRows += 1 val rowKey = keyGenerator(currentRow) joinedRow.withRight(currentRow) rightOuterIterator(rowKey, hashed.get(rowKey), joinedRow, resultProj, numOutputRows) }) case FullOuter => // TODO(davies): use UnsafeRow val leftHashTable = buildHashTable(leftIter, numLeftRows, newProjection(leftKeys, left.output)) val rightHashTable = buildHashTable(rightIter, numRightRows, newProjection(rightKeys, right.output)) (leftHashTable.keySet ++ rightHashTable.keySet).iterator.flatMap { key => fullOuterIterator(key, leftHashTable.getOrElse(key, EMPTY_LIST), rightHashTable.getOrElse(key, EMPTY_LIST), joinedRow, numOutputRows) } case x => throw new IllegalArgumentException( s"ShuffledHashOuterJoin should not take $x as the JoinType") } } } }
Example 3
Source File: StarryHashJoinExec.scala From starry with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.joins import com.github.passionke.starry.SparkPlanExecutor import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.execution.metric.SQLMetrics import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan} case class StarryHashJoinExec( leftKeys: Seq[Expression], rightKeys: Seq[Expression], joinType: JoinType, buildSide: BuildSide, condition: Option[Expression], left: SparkPlan, right: SparkPlan) extends BinaryExecNode with HashJoin { override lazy val metrics = Map( "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe")) protected override def doExecute(): RDD[InternalRow] = { val numOutputRows = longMetric("numOutputRows") val avgHashProbe = longMetric("avgHashProbe") val rows = SparkPlanExecutor.doExec(buildPlan) val hashed = HashedRelation(rows.iterator, buildKeys, rows.length, null) streamedPlan.execute().mapPartitions { streamedIter => join(streamedIter, hashed, numOutputRows, avgHashProbe) } } }