R: Compute histogram statistics for given column

histogram {SparkR}

R Documentation

Compute histogram statistics for given column

Description

This function computes a histogram for a given SparkR Column.

Usage

## S4 method for signature 'SparkDataFrame,characterOrColumn'
histogram(df, col, nbins = 10)

Arguments

`df`	the SparkDataFrame containing the Column to build the histogram from.
`col`	the column as Character string or a Column to build the histogram from.
`nbins`	the number of bins (optional). Default value is 10.

Value

a data.frame with the histogram statistics, i.e., counts and centroids.

Note

histogram since 2.0.0

See Also

Other SparkDataFrame functions: $, $,SparkDataFrame-method, $<-, $<-,SparkDataFrame-method, select, select, select,SparkDataFrame,Column-method, select,SparkDataFrame,character-method, select,SparkDataFrame,list-method; SparkDataFrame-class; [, [,SparkDataFrame-method, [[, [[,SparkDataFrame,numericOrcharacter-method, subset, subset, subset,SparkDataFrame-method; agg, agg, agg, agg,GroupedData-method, agg,SparkDataFrame-method, summarize, summarize, summarize, summarize,GroupedData-method, summarize,SparkDataFrame-method; arrange, arrange, arrange, arrange,SparkDataFrame,Column-method, arrange,SparkDataFrame,character-method, orderBy,SparkDataFrame,characterOrColumn-method; as.data.frame, as.data.frame,SparkDataFrame-method; attach, attach,SparkDataFrame-method; cache, cache, cache,SparkDataFrame-method; collect, collect, collect,SparkDataFrame-method; colnames, colnames, colnames,SparkDataFrame-method, colnames<-, colnames<-, colnames<-,SparkDataFrame-method, columns, columns, columns,SparkDataFrame-method, names, names,SparkDataFrame-method, names<-, names<-,SparkDataFrame-method; coltypes, coltypes, coltypes,SparkDataFrame-method, coltypes<-, coltypes<-, coltypes<-,SparkDataFrame,character-method; count,SparkDataFrame-method, nrow, nrow, nrow,SparkDataFrame-method; createOrReplaceTempView, createOrReplaceTempView, createOrReplaceTempView,SparkDataFrame,character-method; dapplyCollect, dapplyCollect, dapplyCollect,SparkDataFrame,function-method; dapply, dapply, dapply,SparkDataFrame,function,structType-method; describe, describe, describe, describe,SparkDataFrame,ANY-method, describe,SparkDataFrame,character-method, describe,SparkDataFrame-method, summary, summary, summary,SparkDataFrame-method; dim, dim,SparkDataFrame-method; distinct, distinct, distinct,SparkDataFrame-method, unique, unique,SparkDataFrame-method; dropDuplicates, dropDuplicates, dropDuplicates,SparkDataFrame-method; dropna, dropna, dropna,SparkDataFrame-method, fillna, fillna, fillna,SparkDataFrame-method, na.omit, na.omit, na.omit,SparkDataFrame-method; drop, drop, drop, drop,ANY-method, drop,SparkDataFrame-method; dtypes, dtypes, dtypes,SparkDataFrame-method; except, except, except,SparkDataFrame,SparkDataFrame-method; explain, explain, explain,SparkDataFrame-method; filter, filter, filter,SparkDataFrame,characterOrColumn-method, where, where, where,SparkDataFrame,characterOrColumn-method; first, first, first, first,SparkDataFrame-method, first,characterOrColumn-method; gapplyCollect, gapplyCollect, gapplyCollect, gapplyCollect,GroupedData-method, gapplyCollect,SparkDataFrame-method; gapply, gapply, gapply, gapply,GroupedData-method, gapply,SparkDataFrame-method; groupBy, groupBy, groupBy,SparkDataFrame-method, group_by, group_by, group_by,SparkDataFrame-method; head, head,SparkDataFrame-method; insertInto, insertInto, insertInto,SparkDataFrame,character-method; intersect, intersect, intersect,SparkDataFrame,SparkDataFrame-method; isLocal, isLocal, isLocal,SparkDataFrame-method; join, join,SparkDataFrame,SparkDataFrame-method; limit, limit, limit,SparkDataFrame,numeric-method; merge, merge, merge,SparkDataFrame,SparkDataFrame-method; mutate, mutate, mutate,SparkDataFrame-method, transform, transform, transform,SparkDataFrame-method; ncol, ncol,SparkDataFrame-method; persist, persist, persist,SparkDataFrame,character-method; printSchema, printSchema, printSchema,SparkDataFrame-method; randomSplit, randomSplit, randomSplit,SparkDataFrame,numeric-method; rbind, rbind, rbind,SparkDataFrame-method; registerTempTable, registerTempTable, registerTempTable,SparkDataFrame,character-method; rename, rename, rename,SparkDataFrame-method, withColumnRenamed, withColumnRenamed, withColumnRenamed,SparkDataFrame,character,character-method; repartition, repartition, repartition,SparkDataFrame-method; sample, sample, sample,SparkDataFrame,logical,numeric-method, sample_frac, sample_frac, sample_frac,SparkDataFrame,logical,numeric-method; saveAsParquetFile, saveAsParquetFile, saveAsParquetFile,SparkDataFrame,character-method, write.parquet, write.parquet, write.parquet,SparkDataFrame,character-method; saveAsTable, saveAsTable, saveAsTable,SparkDataFrame,character-method; saveDF, saveDF, saveDF,SparkDataFrame,character-method, write.df, write.df, write.df, write.df,SparkDataFrame,character-method; schema, schema, schema,SparkDataFrame-method; selectExpr, selectExpr, selectExpr,SparkDataFrame,character-method; showDF, showDF, showDF,SparkDataFrame-method; show, show, show,Column-method, show,GroupedData-method, show,SparkDataFrame-method, show,WindowSpec-method; str, str,SparkDataFrame-method; take, take, take,SparkDataFrame,numeric-method; union, union, union,SparkDataFrame,SparkDataFrame-method, unionAll, unionAll, unionAll,SparkDataFrame,SparkDataFrame-method; unpersist, unpersist, unpersist,SparkDataFrame-method; withColumn, withColumn, withColumn,SparkDataFrame,character,Column-method; with, with,SparkDataFrame-method; write.jdbc, write.jdbc, write.jdbc,SparkDataFrame,character,character-method; write.json, write.json, write.json,SparkDataFrame,character-method; write.orc, write.orc, write.orc,SparkDataFrame,character-method; write.text, write.text, write.text,SparkDataFrame,character-method

Examples

## Not run: 
##D 
##D # Create a SparkDataFrame from the Iris dataset
##D irisDF <- createDataFrame(iris)
##D 
##D # Compute histogram statistics
##D histStats <- histogram(irisDF, irisDF$Sepal_Length, nbins = 12)
##D 
##D # Once SparkR has computed the histogram statistics, the histogram can be
##D # rendered using the ggplot2 library:
##D 
##D require(ggplot2)
##D plot <- ggplot(histStats, aes(x = centroids, y = counts)) +
##D         geom_bar(stat = "identity") +
##D         xlab("Sepal_Length") + ylab("Frequency")
## End(Not run)

[Package SparkR version 2.0.1 Index]