org.apache.spark.ml.classification
Class RandomForestClassifier

Object
  extended by org.apache.spark.ml.PipelineStage
      extended by org.apache.spark.ml.Estimator<M>
          extended by org.apache.spark.ml.Predictor<Vector,RandomForestClassifier,RandomForestClassificationModel>
              extended by org.apache.spark.ml.classification.RandomForestClassifier
All Implemented Interfaces:
java.io.Serializable, Logging, Params

public final class RandomForestClassifier
extends Predictor<Vector,RandomForestClassifier,RandomForestClassificationModel>

:: Experimental :: Random Forest learning algorithm for classification. It supports both binary and multiclass labels, as well as both continuous and categorical features.

See Also:
Serialized Form

Constructor Summary
RandomForestClassifier()
           
RandomForestClassifier(String uid)
           
 
Method Summary
 RandomForestClassifier copy(ParamMap extra)
          Creates a copy of this instance with the same UID and some extra params.
 RandomForestClassifier setCacheNodeIds(boolean value)
           
 RandomForestClassifier setCheckpointInterval(int value)
           
 RandomForestClassifier setFeatureSubsetStrategy(String value)
           
 RandomForestClassifier setImpurity(String value)
           
 RandomForestClassifier setMaxBins(int value)
           
 RandomForestClassifier setMaxDepth(int value)
           
 RandomForestClassifier setMaxMemoryInMB(int value)
           
 RandomForestClassifier setMinInfoGain(double value)
           
 RandomForestClassifier setMinInstancesPerNode(int value)
           
 RandomForestClassifier setNumTrees(int value)
           
 RandomForestClassifier setSeed(long value)
           
 RandomForestClassifier setSubsamplingRate(double value)
           
static String[] supportedFeatureSubsetStrategies()
          Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2
static String[] supportedImpurities()
          Accessor for supported impurity settings: entropy, gini
 String uid()
           
 StructType validateAndTransformSchema(StructType schema, boolean fitting, DataType featuresDataType)
          Validates and transforms the input schema with the provided param map.
 
Methods inherited from class org.apache.spark.ml.Predictor
fit, setFeaturesCol, setLabelCol, setPredictionCol, transformSchema
 
Methods inherited from class org.apache.spark.ml.Estimator
fit, fit, fit, fit
 
Methods inherited from class Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.spark.ml.param.Params
clear, copyValues, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, paramMap, params, set, set, set, setDefault, setDefault, setDefault, shouldOwn, validateParams
 
Methods inherited from interface org.apache.spark.Logging
initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning
 

Constructor Detail

RandomForestClassifier

public RandomForestClassifier(String uid)

RandomForestClassifier

public RandomForestClassifier()
Method Detail

supportedImpurities

public static final String[] supportedImpurities()
Accessor for supported impurity settings: entropy, gini


supportedFeatureSubsetStrategies

public static final String[] supportedFeatureSubsetStrategies()
Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2


uid

public String uid()

setMaxDepth

public RandomForestClassifier setMaxDepth(int value)

setMaxBins

public RandomForestClassifier setMaxBins(int value)

setMinInstancesPerNode

public RandomForestClassifier setMinInstancesPerNode(int value)

setMinInfoGain

public RandomForestClassifier setMinInfoGain(double value)

setMaxMemoryInMB

public RandomForestClassifier setMaxMemoryInMB(int value)

setCacheNodeIds

public RandomForestClassifier setCacheNodeIds(boolean value)

setCheckpointInterval

public RandomForestClassifier setCheckpointInterval(int value)

setImpurity

public RandomForestClassifier setImpurity(String value)

setSubsamplingRate

public RandomForestClassifier setSubsamplingRate(double value)

setSeed

public RandomForestClassifier setSeed(long value)

setNumTrees

public RandomForestClassifier setNumTrees(int value)

setFeatureSubsetStrategy

public RandomForestClassifier setFeatureSubsetStrategy(String value)

copy

public RandomForestClassifier copy(ParamMap extra)
Description copied from interface: Params
Creates a copy of this instance with the same UID and some extra params. Subclasses should implement this method and set the return type properly.

Specified by:
copy in interface Params
Specified by:
copy in class Predictor<Vector,RandomForestClassifier,RandomForestClassificationModel>
Parameters:
extra - (undocumented)
Returns:
(undocumented)
See Also:
defaultCopy()

validateAndTransformSchema

public StructType validateAndTransformSchema(StructType schema,
                                             boolean fitting,
                                             DataType featuresDataType)
Validates and transforms the input schema with the provided param map.

Parameters:
schema - input schema
fitting - whether this is in fitting
featuresDataType - SQL DataType for FeaturesType. E.g., VectorUDT for vector features.
Returns:
output schema