class Strategy extends Serializable
Stores all the configuration options for tree construction
- Annotations
- @Since( "1.0.0" )
- Source
- Strategy.scala
- Alphabetic
- By Inheritance
- Strategy
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
Strategy(algo: Algo.Algo, impurity: Impurity, maxDepth: Int, numClasses: Int, maxBins: Int, categoricalFeaturesInfo: Map[Integer, Integer])
Java-friendly constructor for org.apache.spark.mllib.tree.configuration.Strategy
Java-friendly constructor for org.apache.spark.mllib.tree.configuration.Strategy
- Annotations
- @Since( "1.1.0" )
-
new
Strategy(algo: Algo.Algo, impurity: Impurity, maxDepth: Int, numClasses: Int, maxBins: Int, quantileCalculationStrategy: QuantileStrategy.QuantileStrategy, categoricalFeaturesInfo: Map[Int, Int], minInstancesPerNode: Int, minInfoGain: Double, maxMemoryInMB: Int, subsamplingRate: Double, useNodeIdCache: Boolean, checkpointInterval: Int)
Backwards compatible constructor for org.apache.spark.mllib.tree.configuration.Strategy
Backwards compatible constructor for org.apache.spark.mllib.tree.configuration.Strategy
- Annotations
- @Since( "1.0.0" )
-
new
Strategy(algo: Algo.Algo, impurity: Impurity, maxDepth: Int, numClasses: Int = 2, maxBins: Int = 32, quantileCalculationStrategy: QuantileStrategy.QuantileStrategy = Sort, categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](), minInstancesPerNode: Int = 1, minInfoGain: Double = 0.0, maxMemoryInMB: Int = 256, subsamplingRate: Double = 1, useNodeIdCache: Boolean = false, checkpointInterval: Int = 10, minWeightFractionPerNode: Double = 0.0, bootstrap: Boolean = false)
- algo
Learning goal. Supported:
org.apache.spark.mllib.tree.configuration.Algo.Classification
,org.apache.spark.mllib.tree.configuration.Algo.Regression
- impurity
Criterion used for information gain calculation. Supported for Classification: org.apache.spark.mllib.tree.impurity.Gini, org.apache.spark.mllib.tree.impurity.Entropy. Supported for Regression: org.apache.spark.mllib.tree.impurity.Variance.
- maxDepth
Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means 1 internal node + 2 leaf nodes).
- numClasses
Number of classes for classification. (Ignored for regression.) Default value is 2 (binary classification).
- maxBins
Maximum number of bins used for discretizing continuous features and for choosing how to split on features at each node. More bins give higher granularity.
- quantileCalculationStrategy
Algorithm for calculating quantiles. Supported:
org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort
- categoricalFeaturesInfo
A map storing information about the categorical variables and the number of discrete values they take. An entry (n to k) indicates that feature n is categorical with k categories indexed from 0: {0, 1, ..., k-1}.
- minInstancesPerNode
Minimum number of instances each child must have after split. Default value is 1. If a split cause left or right child to have less than minInstancesPerNode, this split will not be considered as a valid split.
- minInfoGain
Minimum information gain a split must get. Default value is 0.0. If a split has less information gain than minInfoGain, this split will not be considered as a valid split.
- maxMemoryInMB
Maximum memory in MB allocated to histogram aggregation. Default value is 256 MB. If too small, then 1 node will be split per iteration, and its aggregates may exceed this size.
- subsamplingRate
Fraction of the training data used for learning decision tree.
- useNodeIdCache
If this is true, instead of passing trees to executors, the algorithm will maintain a separate RDD of node Id cache for each row.
- checkpointInterval
How often to checkpoint when the node Id cache gets updated. E.g. 10 means that the cache will get checkpointed every 10 updates. If the checkpoint directory is not set in org.apache.spark.SparkContext, this setting is ignored.
- Annotations
- @Since( "1.3.0" )
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
var
algo: Algo.Algo
- Annotations
- @Since( "1.0.0" )
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
var
categoricalFeaturesInfo: Map[Int, Int]
- Annotations
- @Since( "1.0.0" )
-
var
checkpointInterval: Int
- Annotations
- @Since( "1.2.0" )
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
copy: Strategy
Returns a shallow copy of this instance.
Returns a shallow copy of this instance.
- Annotations
- @Since( "1.2.0" )
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
getAlgo(): Algo.Algo
- Annotations
- @Since( "1.0.0" )
-
def
getCategoricalFeaturesInfo(): Map[Int, Int]
- Annotations
- @Since( "1.0.0" )
-
def
getCheckpointInterval(): Int
- Annotations
- @Since( "1.2.0" )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getImpurity(): Impurity
- Annotations
- @Since( "1.0.0" )
-
def
getMaxBins(): Int
- Annotations
- @Since( "1.0.0" )
-
def
getMaxDepth(): Int
- Annotations
- @Since( "1.0.0" )
-
def
getMaxMemoryInMB(): Int
- Annotations
- @Since( "1.0.0" )
-
def
getMinInfoGain(): Double
- Annotations
- @Since( "1.2.0" )
-
def
getMinInstancesPerNode(): Int
- Annotations
- @Since( "1.2.0" )
-
def
getMinWeightFractionPerNode(): Double
- Annotations
- @Since( "3.0.0" )
-
def
getNumClasses(): Int
- Annotations
- @Since( "1.2.0" )
-
def
getQuantileCalculationStrategy(): QuantileStrategy.QuantileStrategy
- Annotations
- @Since( "1.0.0" )
-
def
getSubsamplingRate(): Double
- Annotations
- @Since( "1.2.0" )
-
def
getUseNodeIdCache(): Boolean
- Annotations
- @Since( "1.2.0" )
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
var
impurity: Impurity
- Annotations
- @Since( "1.0.0" )
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isMulticlassClassification: Boolean
- Annotations
- @Since( "1.2.0" )
-
def
isMulticlassWithCategoricalFeatures: Boolean
- Annotations
- @Since( "1.2.0" )
-
var
maxBins: Int
- Annotations
- @Since( "1.0.0" )
-
var
maxDepth: Int
- Annotations
- @Since( "1.0.0" )
-
var
maxMemoryInMB: Int
- Annotations
- @Since( "1.0.0" )
-
var
minInfoGain: Double
- Annotations
- @Since( "1.2.0" )
-
var
minInstancesPerNode: Int
- Annotations
- @Since( "1.2.0" )
-
var
minWeightFractionPerNode: Double
- Annotations
- @Since( "3.0.0" )
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
var
numClasses: Int
- Annotations
- @Since( "1.2.0" )
-
var
quantileCalculationStrategy: QuantileStrategy.QuantileStrategy
- Annotations
- @Since( "1.0.0" )
-
def
setAlgo(algo: String): Unit
Sets Algorithm using a String.
Sets Algorithm using a String.
- Annotations
- @Since( "1.2.0" )
-
def
setAlgo(arg0: Algo.Algo): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setCategoricalFeaturesInfo(categoricalFeaturesInfo: Map[Integer, Integer]): Unit
Sets categoricalFeaturesInfo using a Java Map.
Sets categoricalFeaturesInfo using a Java Map.
- Annotations
- @Since( "1.2.0" )
-
def
setCategoricalFeaturesInfo(arg0: Map[Int, Int]): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setCheckpointInterval(arg0: Int): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setImpurity(arg0: Impurity): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMaxBins(arg0: Int): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMaxDepth(arg0: Int): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMaxMemoryInMB(arg0: Int): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setMinInfoGain(arg0: Double): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setMinInstancesPerNode(arg0: Int): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setMinWeightFractionPerNode(arg0: Double): Unit
- Annotations
- @Since( "3.0.0" )
-
def
setNumClasses(arg0: Int): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setQuantileCalculationStrategy(arg0: QuantileStrategy.QuantileStrategy): Unit
- Annotations
- @Since( "1.0.0" )
-
def
setSubsamplingRate(arg0: Double): Unit
- Annotations
- @Since( "1.2.0" )
-
def
setUseNodeIdCache(arg0: Boolean): Unit
- Annotations
- @Since( "1.2.0" )
-
var
subsamplingRate: Double
- Annotations
- @Since( "1.2.0" )
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
var
useNodeIdCache: Boolean
- Annotations
- @Since( "1.2.0" )
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()