public class OrcFileFormat extends Object implements org.apache.spark.sql.execution.datasources.FileFormat, DataSourceRegister, scala.Serializable
FileFormat
for reading ORC files. If this is moved or renamed, please update
DataSource
's backwardCompatibilityMap.Constructor and Description |
---|
OrcFileFormat() |
Modifier and Type | Method and Description |
---|---|
static void |
addSparkVersionMetadata(org.apache.hadoop.mapred.RecordWriter<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Writable> recordWriter)
Add a metadata specifying Spark version.
|
scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> |
buildReader(SparkSession sparkSession,
StructType dataSchema,
StructType partitionSchema,
StructType requiredSchema,
scala.collection.Seq<Filter> filters,
scala.collection.immutable.Map<String,String> options,
org.apache.hadoop.conf.Configuration hadoopConf) |
static scala.collection.immutable.Map<String,String> |
extensionsForCompressionCodecNames() |
scala.Option<StructType> |
inferSchema(SparkSession sparkSession,
scala.collection.immutable.Map<String,String> options,
scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files) |
static DataType |
inspectorToDataType(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector) |
boolean |
isSplitable(SparkSession sparkSession,
scala.collection.immutable.Map<String,String> options,
org.apache.hadoop.fs.Path path) |
static DataType |
javaTypeToDataType(java.lang.reflect.Type clz) |
static void |
org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger x$1) |
static org.slf4j.Logger |
org$apache$spark$internal$Logging$$log_() |
org.apache.spark.sql.execution.datasources.OutputWriterFactory |
prepareWrite(SparkSession sparkSession,
org.apache.hadoop.mapreduce.Job job,
scala.collection.immutable.Map<String,String> options,
StructType dataSchema) |
static void |
setRequiredColumns(org.apache.hadoop.conf.Configuration conf,
StructType dataSchema,
StructType requestedSchema) |
String |
shortName()
The string that represents the format that this data source provider uses.
|
boolean |
supportDataType(DataType dataType) |
static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector |
toInspector(DataType dataType) |
static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector |
toInspector(org.apache.spark.sql.catalyst.expressions.Expression expr) |
String |
toString() |
static HiveInspectors.typeInfoConversions |
typeInfoConversions(DataType dt) |
static scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow> |
unwrapOrcStructs(org.apache.hadoop.conf.Configuration conf,
StructType dataSchema,
StructType requiredSchema,
scala.Option<org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector> maybeStructOI,
scala.collection.Iterator<org.apache.hadoop.io.Writable> iterator) |
static scala.Function1<Object,Object> |
unwrapperFor(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector objectInspector) |
static scala.Function3<Object,org.apache.spark.sql.catalyst.InternalRow,Object,scala.runtime.BoxedUnit> |
unwrapperFor(org.apache.hadoop.hive.serde2.objectinspector.StructField field) |
static Object[] |
wrap(org.apache.spark.sql.catalyst.InternalRow row,
scala.Function1<Object,Object>[] wrappers,
Object[] cache,
DataType[] dataTypes) |
static Object |
wrap(Object a,
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector oi,
DataType dataType) |
static Object[] |
wrap(scala.collection.Seq<Object> row,
scala.Function1<Object,Object>[] wrappers,
Object[] cache,
DataType[] dataTypes) |
public static scala.collection.immutable.Map<String,String> extensionsForCompressionCodecNames()
public static scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow> unwrapOrcStructs(org.apache.hadoop.conf.Configuration conf, StructType dataSchema, StructType requiredSchema, scala.Option<org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector> maybeStructOI, scala.collection.Iterator<org.apache.hadoop.io.Writable> iterator)
public static void setRequiredColumns(org.apache.hadoop.conf.Configuration conf, StructType dataSchema, StructType requestedSchema)
public static void addSparkVersionMetadata(org.apache.hadoop.mapred.RecordWriter<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.Writable> recordWriter)
recordWriter
- (undocumented)public static DataType javaTypeToDataType(java.lang.reflect.Type clz)
public static scala.Function1<Object,Object> unwrapperFor(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector objectInspector)
public static scala.Function3<Object,org.apache.spark.sql.catalyst.InternalRow,Object,scala.runtime.BoxedUnit> unwrapperFor(org.apache.hadoop.hive.serde2.objectinspector.StructField field)
public static Object wrap(Object a, org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector oi, DataType dataType)
public static Object[] wrap(org.apache.spark.sql.catalyst.InternalRow row, scala.Function1<Object,Object>[] wrappers, Object[] cache, DataType[] dataTypes)
public static Object[] wrap(scala.collection.Seq<Object> row, scala.Function1<Object,Object>[] wrappers, Object[] cache, DataType[] dataTypes)
public static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector toInspector(DataType dataType)
public static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector toInspector(org.apache.spark.sql.catalyst.expressions.Expression expr)
public static DataType inspectorToDataType(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector inspector)
public static HiveInspectors.typeInfoConversions typeInfoConversions(DataType dt)
public static org.slf4j.Logger org$apache$spark$internal$Logging$$log_()
public static void org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger x$1)
public String shortName()
DataSourceRegister
override def shortName(): String = "parquet"
shortName
in interface DataSourceRegister
public String toString()
toString
in class Object
public scala.Option<StructType> inferSchema(SparkSession sparkSession, scala.collection.immutable.Map<String,String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)
inferSchema
in interface org.apache.spark.sql.execution.datasources.FileFormat
public org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite(SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<String,String> options, StructType dataSchema)
prepareWrite
in interface org.apache.spark.sql.execution.datasources.FileFormat
public boolean isSplitable(SparkSession sparkSession, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.fs.Path path)
isSplitable
in interface org.apache.spark.sql.execution.datasources.FileFormat
public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader(SparkSession sparkSession, StructType dataSchema, StructType partitionSchema, StructType requiredSchema, scala.collection.Seq<Filter> filters, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.conf.Configuration hadoopConf)
buildReader
in interface org.apache.spark.sql.execution.datasources.FileFormat
public boolean supportDataType(DataType dataType)
supportDataType
in interface org.apache.spark.sql.execution.datasources.FileFormat