org.apache.spark.ml.Predictor<FeaturesType,Learner,M>

org.apache.spark.ml.regression.Regressor<Vector,DecisionTreeRegressor,DecisionTreeRegressionModel>

org.apache.spark.ml.regression.DecisionTreeRegressor

All Implemented Interfaces:: Serializable, org.apache.spark.internal.Logging, Params, HasCheckpointInterval, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, HasVarianceCol, HasWeightCol, PredictorParams, DecisionTreeParams, DecisionTreeRegressorParams, HasVarianceImpurity, TreeRegressorParams, DefaultParamsWritable, Identifiable, MLWritable

public class DecisionTreeRegressor extends Regressor<Vector,DecisionTreeRegressor,DecisionTreeRegressionModel> implements DecisionTreeRegressorParams, DefaultParamsWritable

Decision tree learning algorithm for regression. It supports both continuous and categorical features.

See Also:

Serialized Form

Nested Class Summary

Nested classes/interfaces inherited from interface org.apache.spark.internal.Logging
org.apache.spark.internal.Logging.LogStringContext, org.apache.spark.internal.Logging.SparkShellLoggingFilter
Constructor Summary

Constructors

Constructor

Description

DecisionTreeRegressor()

DecisionTreeRegressor(String uid)
Method Summary

Modifier and Type

Method

Description

final BooleanParam

cacheNodeIds()

If false, the algorithm will pass trees to executors to match instances with nodes.

final IntParam

checkpointInterval()

Param for set checkpoint interval (>= 1) or disable checkpoint (-1).

DecisionTreeRegressor

copy(ParamMap extra)

Creates a copy of this instance with the same UID and some extra params.

final Param<String>

impurity()

Criterion used for information gain calculation (case-insensitive).

final Param<String>

leafCol()

Leaf indices column name.

static DecisionTreeRegressor

load(String path)

final IntParam

maxBins()

Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node.

final IntParam

maxDepth()

Maximum depth of the tree (nonnegative).

final IntParam

maxMemoryInMB()

Maximum memory in MB allocated to histogram aggregation.

final DoubleParam

minInfoGain()

Minimum information gain for a split to be considered at a tree node.

final IntParam

minInstancesPerNode()

Minimum number of instances each child must have after split.

final DoubleParam

minWeightFractionPerNode()

Minimum fraction of the weighted sample count that each child must have after split.

static MLReader<T>

read()

final LongParam

seed()

Param for random seed.

DecisionTreeRegressor

setCacheNodeIds(boolean value)

DecisionTreeRegressor

setCheckpointInterval(int value)

Specifies how often to checkpoint the cached node IDs.

DecisionTreeRegressor

setImpurity(String value)

DecisionTreeRegressor

setMaxBins(int value)

DecisionTreeRegressor

setMaxDepth(int value)

DecisionTreeRegressor

setMaxMemoryInMB(int value)

DecisionTreeRegressor

setMinInfoGain(double value)

DecisionTreeRegressor

setMinInstancesPerNode(int value)

DecisionTreeRegressor

setMinWeightFractionPerNode(double value)

DecisionTreeRegressor

setSeed(long value)

DecisionTreeRegressor

setVarianceCol(String value)

DecisionTreeRegressor

setWeightCol(String value)

Sets the value of param weightCol().

static final String[]

supportedImpurities()

Accessor for supported impurities: variance

String

uid()

An immutable unique ID for the object and its derivatives.

final Param<String>

varianceCol()

Param for Column name for the biased sample variance of prediction.

final Param<String>

weightCol()

Param for weight column name.

Methods inherited from class org.apache.spark.ml.Predictor
featuresCol, fit, labelCol, predictionCol, setFeaturesCol, setLabelCol, setPredictionCol, transformSchema

Methods inherited from class org.apache.spark.ml.Estimator
fit, fit, fit, fit

Methods inherited from class org.apache.spark.ml.PipelineStage
params

Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait

Methods inherited from interface org.apache.spark.ml.tree.DecisionTreeParams
getCacheNodeIds, getLeafCol, getMaxBins, getMaxDepth, getMaxMemoryInMB, getMinInfoGain, getMinInstancesPerNode, getMinWeightFractionPerNode, getOldStrategy, setLeafCol

Methods inherited from interface org.apache.spark.ml.tree.DecisionTreeRegressorParams
validateAndTransformSchema

Methods inherited from interface org.apache.spark.ml.util.DefaultParamsWritable
write

Methods inherited from interface org.apache.spark.ml.param.shared.HasCheckpointInterval
getCheckpointInterval

Methods inherited from interface org.apache.spark.ml.param.shared.HasFeaturesCol
featuresCol, getFeaturesCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasLabelCol
getLabelCol, labelCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasPredictionCol
getPredictionCol, predictionCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasSeed
getSeed

Methods inherited from interface org.apache.spark.ml.param.shared.HasVarianceCol
getVarianceCol

Methods inherited from interface org.apache.spark.ml.tree.HasVarianceImpurity
getImpurity, getOldImpurity

Methods inherited from interface org.apache.spark.ml.param.shared.HasWeightCol
getWeightCol

Methods inherited from interface org.apache.spark.ml.util.Identifiable
toString

Methods inherited from interface org.apache.spark.internal.Logging
initializeForcefully, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, isTraceEnabled, log, logDebug, logDebug, logDebug, logDebug, logError, logError, logError, logError, logInfo, logInfo, logInfo, logInfo, logName, LogStringContext, logTrace, logTrace, logTrace, logTrace, logWarning, logWarning, logWarning, logWarning, org$apache$spark$internal$Logging$$log_, org$apache$spark$internal$Logging$$log__$eq, withLogContext

Methods inherited from interface org.apache.spark.ml.util.MLWritable
save

Methods inherited from interface org.apache.spark.ml.param.Params
clear, copyValues, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, onParamChange, paramMap, params, set, set, set, setDefault, setDefault, shouldOwn

Constructor Details
- DecisionTreeRegressor
  
  public DecisionTreeRegressor(String uid)
- DecisionTreeRegressor
  
  public DecisionTreeRegressor()
Method Details
- supportedImpurities
  
  public static final String[] supportedImpurities()
  
  Accessor for supported impurities: variance
- load
  
  public static DecisionTreeRegressor load(String path)
- read
  
  public static MLReader<T> read()
- varianceCol
  
  public final Param<String> varianceCol()
  
  Description copied from interface: HasVarianceCol
  
  Param for Column name for the biased sample variance of prediction.
  
  Specified by:
  
  varianceCol in interface HasVarianceCol
  
  Returns:
  
  (undocumented)
- impurity
  
  public final Param<String> impurity()
  
  Description copied from interface: HasVarianceImpurity
  
  Criterion used for information gain calculation (case-insensitive). This impurity type is used in DecisionTreeRegressor, RandomForestRegressor, GBTRegressor and GBTClassifier (since GBTClassificationModel is internally composed of DecisionTreeRegressionModels). Supported: "variance". (default = variance)
  
  Specified by:
  
  impurity in interface HasVarianceImpurity
  
  Returns:
  
  (undocumented)
- leafCol
  
  public final Param<String> leafCol()
  
  Description copied from interface: DecisionTreeParams
  
  Leaf indices column name. Predicted leaf index of each instance in each tree by preorder. (default = "")
  
  Specified by:
  
  leafCol in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- maxDepth
  
  public final IntParam maxDepth()
  
  Description copied from interface: DecisionTreeParams
  
  Maximum depth of the tree (nonnegative). E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. (default = 5)
  
  Specified by:
  
  maxDepth in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- maxBins
  
  public final IntParam maxBins()
  
  Description copied from interface: DecisionTreeParams
  
  Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node. More bins give higher granularity. Must be at least 2 and at least number of categories in any categorical feature. (default = 32)
  
  Specified by:
  
  maxBins in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- minInstancesPerNode
  
  public final IntParam minInstancesPerNode()
  
  Description copied from interface: DecisionTreeParams
  
  Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Must be at least 1. (default = 1)
  
  Specified by:
  
  minInstancesPerNode in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- minWeightFractionPerNode
  
  public final DoubleParam minWeightFractionPerNode()
  
  Description copied from interface: DecisionTreeParams
  
  Minimum fraction of the weighted sample count that each child must have after split. If a split causes the fraction of the total weight in the left or right child to be less than minWeightFractionPerNode, the split will be discarded as invalid. Should be in the interval [0.0, 0.5). (default = 0.0)
  
  Specified by:
  
  minWeightFractionPerNode in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- minInfoGain
  
  public final DoubleParam minInfoGain()
  
  Description copied from interface: DecisionTreeParams
  
  Minimum information gain for a split to be considered at a tree node. Should be at least 0.0. (default = 0.0)
  
  Specified by:
  
  minInfoGain in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- maxMemoryInMB
  
  public final IntParam maxMemoryInMB()
  
  Description copied from interface: DecisionTreeParams
  
  Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be split per iteration, and its aggregates may exceed this size. (default = 256 MB)
  
  Specified by:
  
  maxMemoryInMB in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- cacheNodeIds
  
  public final BooleanParam cacheNodeIds()
  
  Description copied from interface: DecisionTreeParams
  
  If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees. Users can set how often should the cache be checkpointed or disable it by setting checkpointInterval. (default = false)
  
  Specified by:
  
  cacheNodeIds in interface DecisionTreeParams
  
  Returns:
  
  (undocumented)
- weightCol
  
  public final Param<String> weightCol()
  
  Description copied from interface: HasWeightCol
  
  Param for weight column name. If this is not set or empty, we treat all instance weights as 1.0.
  
  Specified by:
  
  weightCol in interface HasWeightCol
  
  Returns:
  
  (undocumented)
- seed
  
  public final LongParam seed()
  
  Description copied from interface: HasSeed
  
  Param for random seed.
  
  Specified by:
  
  seed in interface HasSeed
  
  Returns:
  
  (undocumented)
- checkpointInterval
  
  public final IntParam checkpointInterval()
  
  Description copied from interface: HasCheckpointInterval
  
  Param for set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.
  
  Specified by:
  
  checkpointInterval in interface HasCheckpointInterval
  
  Returns:
  
  (undocumented)
- uid
  
  public String uid()
  
  Description copied from interface: Identifiable
  
  An immutable unique ID for the object and its derivatives.
  
  Specified by:
  
  uid in interface Identifiable
  
  Returns:
  
  (undocumented)
- setMaxDepth
  
  public DecisionTreeRegressor setMaxDepth(int value)
- setMaxBins
  
  public DecisionTreeRegressor setMaxBins(int value)
- setMinInstancesPerNode
  
  public DecisionTreeRegressor setMinInstancesPerNode(int value)
- setMinWeightFractionPerNode
  
  public DecisionTreeRegressor setMinWeightFractionPerNode(double value)
- setMinInfoGain
  
  public DecisionTreeRegressor setMinInfoGain(double value)
- setMaxMemoryInMB
  
  public DecisionTreeRegressor setMaxMemoryInMB(int value)
- setCacheNodeIds
  
  public DecisionTreeRegressor setCacheNodeIds(boolean value)
- setCheckpointInterval
  
  public DecisionTreeRegressor setCheckpointInterval(int value)
  
  Specifies how often to checkpoint the cached node IDs. E.g. 10 means that the cache will get checkpointed every 10 iterations. This is only used if cacheNodeIds is true and if the checkpoint directory is set in SparkContext. Must be at least 1. (default = 10)
  
  Parameters:
  
  value - (undocumented)
  
  Returns:
  
  (undocumented)
- setImpurity
  
  public DecisionTreeRegressor setImpurity(String value)
- setSeed
  
  public DecisionTreeRegressor setSeed(long value)
- setVarianceCol
  
  public DecisionTreeRegressor setVarianceCol(String value)
- setWeightCol
  
  public DecisionTreeRegressor setWeightCol(String value)
  
  Sets the value of param weightCol(). If this is not set or empty, we treat all instance weights as 1.0. Default is not set, so all instances have weight one.
  
  Parameters:
  
  value - (undocumented)
  
  Returns:
  
  (undocumented)
- copy
  
  public DecisionTreeRegressor copy(ParamMap extra)
  
  Description copied from interface: Params
  
  Creates a copy of this instance with the same UID and some extra params. Subclasses should implement this method and set the return type properly. See defaultCopy().
  
  Specified by:
  
  copy in interface Params
  
  Specified by:
  
  copy in class Predictor<Vector,DecisionTreeRegressor,DecisionTreeRegressionModel>
  
  Parameters:
  
  extra - (undocumented)
  
  Returns:
  
  (undocumented)

Class DecisionTreeRegressor

Nested Class Summary

Nested classes/interfaces inherited from interface org.apache.spark.internal.Logging

Constructor Summary

Method Summary

Methods inherited from class org.apache.spark.ml.Predictor

Methods inherited from class org.apache.spark.ml.Estimator

Methods inherited from class org.apache.spark.ml.PipelineStage

Methods inherited from class java.lang.Object

Methods inherited from interface org.apache.spark.ml.tree.DecisionTreeParams

Methods inherited from interface org.apache.spark.ml.tree.DecisionTreeRegressorParams

Methods inherited from interface org.apache.spark.ml.util.DefaultParamsWritable

Methods inherited from interface org.apache.spark.ml.param.shared.HasCheckpointInterval

Methods inherited from interface org.apache.spark.ml.param.shared.HasFeaturesCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasLabelCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasPredictionCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasSeed

Methods inherited from interface org.apache.spark.ml.param.shared.HasVarianceCol

Methods inherited from interface org.apache.spark.ml.tree.HasVarianceImpurity

Methods inherited from interface org.apache.spark.ml.param.shared.HasWeightCol

Methods inherited from interface org.apache.spark.ml.util.Identifiable

Methods inherited from interface org.apache.spark.internal.Logging

Methods inherited from interface org.apache.spark.ml.util.MLWritable

Methods inherited from interface org.apache.spark.ml.param.Params

Constructor Details

DecisionTreeRegressor

DecisionTreeRegressor

Method Details

supportedImpurities

load

read

varianceCol

impurity

leafCol

maxDepth

maxBins

minInstancesPerNode

minWeightFractionPerNode

minInfoGain

maxMemoryInMB

cacheNodeIds

weightCol

seed

checkpointInterval

uid

setMaxDepth

setMaxBins

setMinInstancesPerNode

setMinWeightFractionPerNode

setMinInfoGain

setMaxMemoryInMB

setCacheNodeIds

setCheckpointInterval

setImpurity

setSeed

setVarianceCol

setWeightCol

copy