org.apache.spark.sql.parquet

InsertIntoParquetTable

case class InsertIntoParquetTable(relation: ParquetRelation, child: SparkPlan, overwrite: Boolean = false)(sqlContext: SQLContext) extends SparkPlan with UnaryNode with SparkHadoopMapReduceUtil with Product with Serializable

Operator that acts as a sink for queries on RDDs and can be used to store the output inside a directory of Parquet files. This operator is similar to Hive's INSERT INTO TABLE operation in the sense that one can choose to either overwrite or append to a directory. Note that consecutive insertions to the same table must have compatible (source) schemas.

WARNING: EXPERIMENTAL! InsertIntoParquetTable with overwrite=false may cause data corruption in the case that multiple users try to append to the same table simultaneously. Inserting into a table that was previously generated by other means (e.g., by creating an HDFS directory and importing Parquet files generated by other tools) may cause unpredicted behaviour and therefore results in a RuntimeException (only detected via filename pattern so will not catch all cases).

Linear Supertypes
Serializable, Serializable, Product, Equals, SparkHadoopMapReduceUtil, UnaryNode, UnaryNode[SparkPlan], SparkPlan, com.typesafe.scalalogging.slf4j.Logging, QueryPlan[SparkPlan], TreeNode[SparkPlan], AnyRef, Any
Ordering
  1. Alphabetic
  2. By inheritance
Inherited
  1. InsertIntoParquetTable
  2. Serializable
  3. Serializable
  4. Product
  5. Equals
  6. SparkHadoopMapReduceUtil
  7. UnaryNode
  8. UnaryNode
  9. SparkPlan
  10. Logging
  11. QueryPlan
  12. TreeNode
  13. AnyRef
  14. Any
  1. Hide All
  2. Show all
Learn more about member selection
Visibility
  1. Public
  2. All

Instance Constructors

  1. new InsertIntoParquetTable(relation: ParquetRelation, child: SparkPlan, overwrite: Boolean = false)(sqlContext: SQLContext)

Value Members

  1. final def !=(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  2. final def !=(arg0: Any): Boolean

    Definition Classes
    Any
  3. final def ##(): Int

    Definition Classes
    AnyRef → Any
  4. final def ==(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  5. final def ==(arg0: Any): Boolean

    Definition Classes
    Any
  6. def apply(number: Int): SparkPlan

    Definition Classes
    TreeNode
  7. def argString: String

    Definition Classes
    TreeNode
  8. def asCode: String

    Definition Classes
    TreeNode
  9. final def asInstanceOf[T0]: T0

    Definition Classes
    Any
  10. def buildRow(values: Seq[Any]): Row

    Attributes
    protected
    Definition Classes
    SparkPlan
  11. val child: SparkPlan

    Definition Classes
    InsertIntoParquetTable → UnaryNode
  12. def children: List[SparkPlan]

    Definition Classes
    UnaryNode
  13. def clone(): AnyRef

    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  14. def collect[B](pf: PartialFunction[SparkPlan, B]): Seq[B]

    Definition Classes
    TreeNode
  15. final def eq(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  16. def execute(): RDD[Row]

    Inserts all rows into the Parquet file.

    Inserts all rows into the Parquet file.

    Definition Classes
    InsertIntoParquetTableSparkPlan
  17. def executeCollect(): Array[Row]

    Runs this query returning the result as an array.

    Runs this query returning the result as an array.

    Definition Classes
    SparkPlan
  18. def expressions: Seq[Expression]

    Definition Classes
    QueryPlan
  19. def fastEquals(other: TreeNode[_]): Boolean

    Definition Classes
    TreeNode
  20. def finalize(): Unit

    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  21. def flatMap[A](f: (SparkPlan) ⇒ TraversableOnce[A]): Seq[A]

    Definition Classes
    TreeNode
  22. def foreach(f: (SparkPlan) ⇒ Unit): Unit

    Definition Classes
    TreeNode
  23. def generateSchemaString(schema: StructType, prefix: String, builder: StringBuilder): StringBuilder

    Attributes
    protected
    Definition Classes
    QueryPlan
  24. def generateSchemaString(schema: Seq[Attribute]): String

    Attributes
    protected
    Definition Classes
    QueryPlan
  25. def generateTreeString(depth: Int, builder: StringBuilder): StringBuilder

    Attributes
    protected
    Definition Classes
    TreeNode
  26. final def getClass(): Class[_]

    Definition Classes
    AnyRef → Any
  27. def getNodeNumbered(number: MutableInt): SparkPlan

    Attributes
    protected
    Definition Classes
    TreeNode
  28. val id: Long

    Definition Classes
    TreeNode
  29. final def isInstanceOf[T0]: Boolean

    Definition Classes
    Any
  30. lazy val logger: Logger

    Attributes
    protected
    Definition Classes
    Logging
  31. def makeCopy(newArgs: Array[AnyRef]): InsertIntoParquetTable.this.type

    Definition Classes
    TreeNode
  32. def map[A](f: (SparkPlan) ⇒ A): Seq[A]

    Definition Classes
    TreeNode
  33. def mapChildren(f: (SparkPlan) ⇒ SparkPlan): InsertIntoParquetTable.this.type

    Definition Classes
    TreeNode
  34. final def ne(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  35. def newJobContext(conf: Configuration, jobId: JobID): JobContext

    Definition Classes
    SparkHadoopMapReduceUtil
  36. def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext

    Definition Classes
    SparkHadoopMapReduceUtil
  37. def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int): TaskAttemptID

    Definition Classes
    SparkHadoopMapReduceUtil
  38. def nodeName: String

    Definition Classes
    TreeNode
  39. final def notify(): Unit

    Definition Classes
    AnyRef
  40. final def notifyAll(): Unit

    Definition Classes
    AnyRef
  41. def numberedTreeString: String

    Definition Classes
    TreeNode
  42. def otherCopyArgs: List[SQLContext]

    Definition Classes
    InsertIntoParquetTable → TreeNode
  43. def output: Seq[Attribute]

    Definition Classes
    InsertIntoParquetTable → QueryPlan
  44. def outputPartitioning: Partitioning

    Specifies how data is partitioned across different nodes in the cluster.

    Specifies how data is partitioned across different nodes in the cluster.

    Definition Classes
    UnaryNode → SparkPlan
  45. def outputSet: Set[Attribute]

    Definition Classes
    QueryPlan
  46. val overwrite: Boolean

  47. def printSchema(): Unit

    Definition Classes
    QueryPlan
  48. val relation: ParquetRelation

  49. def requiredChildDistribution: Seq[Distribution]

    Specifies any partition requirements on the input data for this operator.

    Specifies any partition requirements on the input data for this operator.

    Definition Classes
    SparkPlan
  50. def sameInstance(other: TreeNode[_]): Boolean

    Definition Classes
    TreeNode
  51. def schemaString: String

    Definition Classes
    QueryPlan
  52. def simpleString: String

    Definition Classes
    TreeNode
  53. val sqlContext: SQLContext

  54. def stringArgs: Iterator[Any]

    Attributes
    protected
    Definition Classes
    TreeNode
  55. final def synchronized[T0](arg0: ⇒ T0): T0

    Definition Classes
    AnyRef
  56. def toString(): String

    Definition Classes
    TreeNode → AnyRef → Any
  57. def transform(rule: PartialFunction[SparkPlan, SparkPlan]): SparkPlan

    Definition Classes
    TreeNode
  58. def transformAllExpressions(rule: PartialFunction[Expression, Expression]): InsertIntoParquetTable.this.type

    Definition Classes
    QueryPlan
  59. def transformChildrenDown(rule: PartialFunction[SparkPlan, SparkPlan]): InsertIntoParquetTable.this.type

    Definition Classes
    TreeNode
  60. def transformChildrenUp(rule: PartialFunction[SparkPlan, SparkPlan]): InsertIntoParquetTable.this.type

    Definition Classes
    TreeNode
  61. def transformDown(rule: PartialFunction[SparkPlan, SparkPlan]): SparkPlan

    Definition Classes
    TreeNode
  62. def transformExpressions(rule: PartialFunction[Expression, Expression]): InsertIntoParquetTable.this.type

    Definition Classes
    QueryPlan
  63. def transformExpressionsDown(rule: PartialFunction[Expression, Expression]): InsertIntoParquetTable.this.type

    Definition Classes
    QueryPlan
  64. def transformExpressionsUp(rule: PartialFunction[Expression, Expression]): InsertIntoParquetTable.this.type

    Definition Classes
    QueryPlan
  65. def transformUp(rule: PartialFunction[SparkPlan, SparkPlan]): SparkPlan

    Definition Classes
    TreeNode
  66. def treeString: String

    Definition Classes
    TreeNode
  67. final def wait(): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  68. final def wait(arg0: Long, arg1: Int): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  69. final def wait(arg0: Long): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  70. def withNewChildren(newChildren: Seq[SparkPlan]): InsertIntoParquetTable.this.type

    Definition Classes
    TreeNode

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from SparkHadoopMapReduceUtil

Inherited from UnaryNode

Inherited from UnaryNode[SparkPlan]

Inherited from SparkPlan

Inherited from com.typesafe.scalalogging.slf4j.Logging

Inherited from QueryPlan[SparkPlan]

Inherited from TreeNode[SparkPlan]

Inherited from AnyRef

Inherited from Any

Ungrouped