class Strategy extends Serializable
Stores all the configuration options for tree construction
- Annotations
- @Since( "1.0.0" )
- Source
- Strategy.scala
- Alphabetic
- By Inheritance
- Strategy
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
Strategy(algo: Algo.Algo, impurity: Impurity, maxDepth: Int, numClasses: Int, maxBins: Int, categoricalFeaturesInfo: Map[Integer, Integer])
Java-friendly constructor for org.apache.spark.mllib.tree.configuration.Strategy
Java-friendly constructor for org.apache.spark.mllib.tree.configuration.Strategy
- Annotations
- @Since( "1.1.0" )
-
new
Strategy(algo: Algo.Algo, impurity: Impurity, maxDepth: Int, numClasses: Int, maxBins: Int, quantileCalculationStrategy: QuantileStrategy.QuantileStrategy, categoricalFeaturesInfo: Map[Int, Int], minInstancesPerNode: Int, minInfoGain: Double, maxMemoryInMB: Int, subsamplingRate: Double, useNodeIdCache: Boolean, checkpointInterval: Int)
Backwards compatible constructor for org.apache.spark.mllib.tree.configuration.Strategy
Backwards compatible constructor for org.apache.spark.mllib.tree.configuration.Strategy
- Annotations
- @Since( "1.0.0" )
-
new
Strategy(algo: Algo.Algo, impurity: Impurity, maxDepth: Int, numClasses: Int = 2, maxBins: Int = 32, quantileCalculationStrategy: QuantileStrategy.QuantileStrategy = Sort, categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](), minInstancesPerNode: Int = 1, minInfoGain: Double = 0.0, maxMemoryInMB: Int = 256, subsamplingRate: Double = 1, useNodeIdCache: Boolean = false, checkpointInterval: Int = 10, minWeightFractionPerNode: Double = 0.0, bootstrap: Boolean = false)
- algo
Learning goal. Supported:
org.apache.spark.mllib.tree.configuration.Algo.Classification
,org.apache.spark.mllib.tree.configuration.Algo.Regression
- impurity
Criterion used for information gain calculation. Supported for Classification: org.apache.spark.mllib.tree.impurity.Gini, org.apache.spark.mllib.tree.impurity.Entropy. Supported for Regression: org.apache.spark.mllib.tree.impurity.Variance.
- maxDepth
Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means 1 internal node + 2 leaf nodes).
- numClasses
Number of classes for classification. (Ignored for regression.) Default value is 2 (binary classification).
- maxBins
Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node. More bins give higher granularity.
- quantileCalculationStrategy
Algorithm for calculating quantiles. Supported:
org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort
- categoricalFeaturesInfo
A map storing information about the categorical variables and the number of discrete values they take. An entry (n to k) indicates that feature n is categorical with k categories indexed from 0: {0, 1, ..., k-1}.
- minInstancesPerNode
Minimum number of instances each child must have after split. Default value is 1. If a split cause left or right child to have less than minInstancesPerNode, this split will not be considered as a valid split.
- minInfoGain
Minimum information gain a split must get. Default value is 0.0. If a split has less information gain than minInfoGain, this split will not be considered as a valid split.
- maxMemoryInMB
Maximum memory in MB allocated to histogram aggregation. Default value is 256 MB. If too small, then 1 node will be split per iteration, and its aggregates may exceed this size.
- subsamplingRate
Fraction of the training data used for learning decision tree.
- useNodeIdCache
If this is true, instead of passing trees to executors, the algorithm will maintain a separate RDD of node Id cache for each row.
- checkpointInterval
How often to checkpoint when the node Id cache gets updated. E.g. 10 means that the cache will get checkpointed every 10 updates. If the checkpoint directory is not set in org.apache.spark.SparkContext, this setting is ignored.
- Annotations
- @Since( "1.3.0" )
Value Members
-
var
algo: Algo.Algo
- Annotations
- @Since( "1.0.0" )
-
var
categoricalFeaturesInfo: Map[Int, Int]
- Annotations
- @Since( "1.0.0" )
-
var
checkpointInterval: Int
- Annotations
- @Since( "1.2.0" )
-
def
copy: Strategy
Returns a shallow copy of this instance.
Returns a shallow copy of this instance.
- Annotations
- @Since( "1.2.0" )
-
def
getAlgo(): Algo.Algo
- Annotations
- @Since( "1.0.0" )
-
def
getCategoricalFeaturesInfo(): Map[Int, Int]
- Annotations
- @Since( "1.0.0" )
-
def
getCheckpointInterval(): Int
- Annotations
- @Since( "1.2.0" )
-
def
getImpurity(): Impurity
- Annotations
- @Since( "1.0.0" )
-
def
getMaxBins(): Int
- Annotations
- @Since( "1.0.0" )
-
def
getMaxDepth(): Int
- Annotations
- @Since( "1.0.0" )
-
def
getMaxMemoryInMB(): Int
- Annotations
- @Since( "1.0.0" )
-
def
getMinInfoGain(): Double
- Annotations
- @Since( "1.2.0" )
-
def
getMinInstancesPerNode(): Int
- Annotations
- @Since( "1.2.0" )
-
def
getMinWeightFractionPerNode(): Double
- Annotations
- @Since( "3.0.0" )
-
def
getNumClasses(): Int
- Annotations
- @Since( "1.2.0" )
-
def
getQuantileCalculationStrategy(): QuantileStrategy.QuantileStrategy
- Annotations
- @Since( "1.0.0" )
-
def
getSubsamplingRate(): Double
- Annotations
- @Since( "1.2.0" )
-
def
getUseNodeIdCache(): Boolean
- Annotations
- @Since( "1.2.0" )
-
var
impurity: Impurity
- Annotations
- @Since( "1.0.0" )
-
def
isMulticlassClassification: Boolean
- Annotations
- @Since( "1.2.0" )
-
def
isMulticlassWithCategoricalFeatures: Boolean
- Annotations
- @Since( "1.2.0" )
-
var
maxBins: Int
- Annotations
- @Since( "1.0.0" )
-
var
maxDepth: Int
- Annotations
- @Since( "1.0.0" )
-
var
maxMemoryInMB: Int
- Annotations
- @Since( "1.0.0" )
-
var
minInfoGain: Double
- Annotations
- @Since( "1.2.0" )
-
var
minInstancesPerNode: Int
- Annotations
- @Since( "1.2.0" )
-
var
minWeightFractionPerNode: Double
- Annotations
- @Since( "3.0.0" )
-
var
numClasses: Int
- Annotations
- @Since( "1.2.0" )
-
var
quantileCalculationStrategy: QuantileStrategy.QuantileStrategy
- Annotations
- @Since( "1.0.0" )
-
def
setAlgo(algo: String): Unit
Sets Algorithm using a String.
Sets Algorithm using a String.
- Annotations
- @Since( "1.2.0" )
-
def
setAlgo(arg0: Algo.Algo): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setCategoricalFeaturesInfo(categoricalFeaturesInfo: Map[Integer, Integer]): Unit
Sets categoricalFeaturesInfo using a Java Map.
Sets categoricalFeaturesInfo using a Java Map.
- Annotations
- @Since( "1.2.0" )
-
def
setCategoricalFeaturesInfo(arg0: Map[Int, Int]): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setCheckpointInterval(arg0: Int): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setImpurity(arg0: Impurity): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMaxBins(arg0: Int): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMaxDepth(arg0: Int): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMaxMemoryInMB(arg0: Int): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMinInfoGain(arg0: Double): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setMinInstancesPerNode(arg0: Int): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setMinWeightFractionPerNode(arg0: Double): Unit
- Annotations
- @Since( "3.0.0" )
-
def
setNumClasses(arg0: Int): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setQuantileCalculationStrategy(arg0: QuantileStrategy.QuantileStrategy): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setSubsamplingRate(arg0: Double): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setUseNodeIdCache(arg0: Boolean): Unit
- Annotations
- @Since( "1.2.0" )
-
var
subsamplingRate: Double
- Annotations
- @Since( "1.2.0" )
-
var
useNodeIdCache: Boolean
- Annotations
- @Since( "1.2.0" )