public class Strategy
extends Object
implements scala.Serializable
org.apache.spark.mllib.tree.configuration.Algo.Classification
,
org.apache.spark.mllib.tree.configuration.Algo.Regression
param: impurity Criterion used for information gain calculation.
Supported for Classification: Gini
,
Entropy
.
Supported for Regression: Variance
.
param: maxDepth Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means
1 internal node + 2 leaf nodes).
param: numClasses Number of classes for classification.
(Ignored for regression.)
Default value is 2 (binary classification).
param: maxBins Maximum number of bins used for discretizing continuous features and
for choosing how to split on features at each node.
More bins give higher granularity.
param: quantileCalculationStrategy Algorithm for calculating quantiles. Supported:
org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort
param: categoricalFeaturesInfo A map storing information about the categorical variables and the
number of discrete values they take. An entry (n to k)
indicates that feature n is categorical with k categories
indexed from 0: {0, 1, ..., k-1}.
param: minInstancesPerNode Minimum number of instances each child must have after split.
Default value is 1. If a split cause left or right child
to have less than minInstancesPerNode,
this split will not be considered as a valid split.
param: minInfoGain Minimum information gain a split must get. Default value is 0.0.
If a split has less information gain than minInfoGain,
this split will not be considered as a valid split.
param: maxMemoryInMB Maximum memory in MB allocated to histogram aggregation. Default value is
256 MB. If too small, then 1 node will be split per iteration, and
its aggregates may exceed this size.
param: subsamplingRate Fraction of the training data used for learning decision tree.
param: useNodeIdCache If this is true, instead of passing trees to executors, the algorithm will
maintain a separate RDD of node Id cache for each row.
param: checkpointInterval How often to checkpoint when the node Id cache gets updated.
E.g. 10 means that the cache will get checkpointed every 10 updates. If
the checkpoint directory is not set in
SparkContext
, this setting is ignored.Constructor and Description |
---|
Strategy(scala.Enumeration.Value algo,
Impurity impurity,
int maxDepth,
int numClasses,
int maxBins,
scala.Enumeration.Value quantileCalculationStrategy,
scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo,
int minInstancesPerNode,
double minInfoGain,
int maxMemoryInMB,
double subsamplingRate,
boolean useNodeIdCache,
int checkpointInterval)
Backwards compatible constructor for
Strategy |
Strategy(scala.Enumeration.Value algo,
Impurity impurity,
int maxDepth,
int numClasses,
int maxBins,
scala.Enumeration.Value quantileCalculationStrategy,
scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo,
int minInstancesPerNode,
double minInfoGain,
int maxMemoryInMB,
double subsamplingRate,
boolean useNodeIdCache,
int checkpointInterval,
double minWeightFractionPerNode,
boolean bootstrap) |
Strategy(scala.Enumeration.Value algo,
Impurity impurity,
int maxDepth,
int numClasses,
int maxBins,
java.util.Map<Integer,Integer> categoricalFeaturesInfo)
Java-friendly constructor for
Strategy |
Modifier and Type | Method and Description |
---|---|
scala.Enumeration.Value |
algo() |
scala.collection.immutable.Map<Object,Object> |
categoricalFeaturesInfo() |
int |
checkpointInterval() |
Strategy |
copy()
Returns a shallow copy of this instance.
|
static Strategy |
defaultStrategy(scala.Enumeration.Value algo)
Construct a default set of parameters for
DecisionTree |
static Strategy |
defaultStrategy(String algo)
Construct a default set of parameters for
DecisionTree |
scala.Enumeration.Value |
getAlgo() |
scala.collection.immutable.Map<Object,Object> |
getCategoricalFeaturesInfo() |
int |
getCheckpointInterval() |
Impurity |
getImpurity() |
int |
getMaxBins() |
int |
getMaxDepth() |
int |
getMaxMemoryInMB() |
double |
getMinInfoGain() |
int |
getMinInstancesPerNode() |
double |
getMinWeightFractionPerNode() |
int |
getNumClasses() |
scala.Enumeration.Value |
getQuantileCalculationStrategy() |
double |
getSubsamplingRate() |
boolean |
getUseNodeIdCache() |
Impurity |
impurity() |
boolean |
isMulticlassClassification() |
boolean |
isMulticlassWithCategoricalFeatures() |
int |
maxBins() |
int |
maxDepth() |
int |
maxMemoryInMB() |
double |
minInfoGain() |
int |
minInstancesPerNode() |
double |
minWeightFractionPerNode() |
int |
numClasses() |
scala.Enumeration.Value |
quantileCalculationStrategy() |
void |
setAlgo(scala.Enumeration.Value x$1) |
void |
setAlgo(String algo)
Sets Algorithm using a String.
|
void |
setCategoricalFeaturesInfo(java.util.Map<Integer,Integer> categoricalFeaturesInfo)
Sets categoricalFeaturesInfo using a Java Map.
|
void |
setCategoricalFeaturesInfo(scala.collection.immutable.Map<Object,Object> x$1) |
void |
setCheckpointInterval(int x$1) |
void |
setImpurity(Impurity x$1) |
void |
setMaxBins(int x$1) |
void |
setMaxDepth(int x$1) |
void |
setMaxMemoryInMB(int x$1) |
void |
setMinInfoGain(double x$1) |
void |
setMinInstancesPerNode(int x$1) |
void |
setMinWeightFractionPerNode(double x$1) |
void |
setNumClasses(int x$1) |
void |
setQuantileCalculationStrategy(scala.Enumeration.Value x$1) |
void |
setSubsamplingRate(double x$1) |
void |
setUseNodeIdCache(boolean x$1) |
double |
subsamplingRate() |
boolean |
useNodeIdCache() |
public Strategy(scala.Enumeration.Value algo, Impurity impurity, int maxDepth, int numClasses, int maxBins, scala.Enumeration.Value quantileCalculationStrategy, scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo, int minInstancesPerNode, double minInfoGain, int maxMemoryInMB, double subsamplingRate, boolean useNodeIdCache, int checkpointInterval, double minWeightFractionPerNode, boolean bootstrap)
public Strategy(scala.Enumeration.Value algo, Impurity impurity, int maxDepth, int numClasses, int maxBins, scala.Enumeration.Value quantileCalculationStrategy, scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo, int minInstancesPerNode, double minInfoGain, int maxMemoryInMB, double subsamplingRate, boolean useNodeIdCache, int checkpointInterval)
Strategy
algo
- (undocumented)impurity
- (undocumented)maxDepth
- (undocumented)numClasses
- (undocumented)maxBins
- (undocumented)quantileCalculationStrategy
- (undocumented)categoricalFeaturesInfo
- (undocumented)minInstancesPerNode
- (undocumented)minInfoGain
- (undocumented)maxMemoryInMB
- (undocumented)subsamplingRate
- (undocumented)useNodeIdCache
- (undocumented)checkpointInterval
- (undocumented)public Strategy(scala.Enumeration.Value algo, Impurity impurity, int maxDepth, int numClasses, int maxBins, java.util.Map<Integer,Integer> categoricalFeaturesInfo)
Strategy
algo
- (undocumented)impurity
- (undocumented)maxDepth
- (undocumented)numClasses
- (undocumented)maxBins
- (undocumented)categoricalFeaturesInfo
- (undocumented)public static Strategy defaultStrategy(String algo)
DecisionTree
algo
- "Classification" or "Regression"public static Strategy defaultStrategy(scala.Enumeration.Value algo)
DecisionTree
algo
- Algo.Classification or Algo.Regressionpublic scala.Enumeration.Value algo()
public Impurity impurity()
public int maxDepth()
public int numClasses()
public int maxBins()
public scala.Enumeration.Value quantileCalculationStrategy()
public scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo()
public int minInstancesPerNode()
public double minInfoGain()
public int maxMemoryInMB()
public double subsamplingRate()
public boolean useNodeIdCache()
public int checkpointInterval()
public double minWeightFractionPerNode()
public boolean isMulticlassClassification()
public boolean isMulticlassWithCategoricalFeatures()
public void setAlgo(String algo)
algo
- (undocumented)public void setCategoricalFeaturesInfo(java.util.Map<Integer,Integer> categoricalFeaturesInfo)
categoricalFeaturesInfo
- (undocumented)public Strategy copy()
public scala.Enumeration.Value getAlgo()
public scala.collection.immutable.Map<Object,Object> getCategoricalFeaturesInfo()
public int getCheckpointInterval()
public Impurity getImpurity()
public int getMaxBins()
public int getMaxDepth()
public int getMaxMemoryInMB()
public double getMinInfoGain()
public int getMinInstancesPerNode()
public double getMinWeightFractionPerNode()
public int getNumClasses()
public scala.Enumeration.Value getQuantileCalculationStrategy()
public double getSubsamplingRate()
public boolean getUseNodeIdCache()
public void setAlgo(scala.Enumeration.Value x$1)
public void setCategoricalFeaturesInfo(scala.collection.immutable.Map<Object,Object> x$1)
public void setCheckpointInterval(int x$1)
public void setImpurity(Impurity x$1)
public void setMaxBins(int x$1)
public void setMaxDepth(int x$1)
public void setMaxMemoryInMB(int x$1)
public void setMinInfoGain(double x$1)
public void setMinInstancesPerNode(int x$1)
public void setMinWeightFractionPerNode(double x$1)
public void setNumClasses(int x$1)
public void setQuantileCalculationStrategy(scala.Enumeration.Value x$1)
public void setSubsamplingRate(double x$1)
public void setUseNodeIdCache(boolean x$1)