Class SQLUtils

Object
org.apache.spark.sql.api.r.SQLUtils

public class SQLUtils extends Object
  • Constructor Details

    • SQLUtils

      public SQLUtils()
  • Method Details

    • getOrCreateSparkSession

      public static SparkSession getOrCreateSparkSession(JavaSparkContext jsc, Map<Object,Object> sparkConfigMap, boolean enableHiveSupport)
    • setSparkContextSessionConf

      public static void setSparkContextSessionConf(SparkSession spark, Map<Object,Object> sparkConfigMap)
    • getSessionConf

      public static Map<String,String> getSessionConf(SparkSession spark)
    • getJavaSparkContext

      public static JavaSparkContext getJavaSparkContext(SparkSession spark)
    • createStructType

      public static StructType createStructType(scala.collection.immutable.Seq<StructField> fields)
    • createStructField

      public static StructField createStructField(String name, String dataType, boolean nullable)
    • createDF

      public static Dataset<Row> createDF(RDD<byte[]> rdd, StructType schema, SparkSession sparkSession)
    • dfToRowRDD

      public static JavaRDD<byte[]> dfToRowRDD(Dataset<Row> df)
    • SERIALIZED_R_DATA_SCHEMA

      public static StructType SERIALIZED_R_DATA_SCHEMA()
    • dapply

      public static Dataset<Row> dapply(Dataset<Row> df, byte[] func, byte[] packageNames, Object[] broadcastVars, StructType schema)
      The helper function for dapply() on R side.
      Parameters:
      df - (undocumented)
      func - (undocumented)
      packageNames - (undocumented)
      broadcastVars - (undocumented)
      schema - (undocumented)
      Returns:
      (undocumented)
    • gapply

      public static Dataset<Row> gapply(RelationalGroupedDataset gd, byte[] func, byte[] packageNames, Object[] broadcastVars, StructType schema)
      The helper function for gapply() on R side.
      Parameters:
      gd - (undocumented)
      func - (undocumented)
      packageNames - (undocumented)
      broadcastVars - (undocumented)
      schema - (undocumented)
      Returns:
      (undocumented)
    • dfToCols

      public static Object[][] dfToCols(Dataset<Row> df)
    • readSqlObject

      public static Object readSqlObject(DataInputStream dis, char dataType)
    • writeSqlObject

      public static boolean writeSqlObject(DataOutputStream dos, Object obj)
    • getTableNames

      public static String[] getTableNames(SparkSession sparkSession, String databaseName)
    • createArrayType

      public static ArrayType createArrayType(String elementType)
    • readArrowStreamFromFile

      public static JavaRDD<byte[]> readArrowStreamFromFile(SparkSession sparkSession, String filename)
      R callable function to read a file in Arrow stream format and create an RDD using each serialized ArrowRecordBatch as a partition.
      Parameters:
      sparkSession - (undocumented)
      filename - (undocumented)
      Returns:
      (undocumented)
    • toDataFrame

      public static Dataset<Row> toDataFrame(JavaRDD<byte[]> arrowBatchRDD, StructType schema, SparkSession sparkSession)
      R callable function to create a DataFrame from a JavaRDD of serialized ArrowRecordBatches.
      Parameters:
      arrowBatchRDD - (undocumented)
      schema - (undocumented)
      sparkSession - (undocumented)
      Returns:
      (undocumented)
    • org$apache$spark$internal$Logging$$log_

      public static org.slf4j.Logger org$apache$spark$internal$Logging$$log_()
    • org$apache$spark$internal$Logging$$log__$eq

      public static void org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger x$1)
    • LogStringContext

      public static org.apache.spark.internal.Logging.LogStringContext LogStringContext(scala.StringContext sc)