org.apache.spark.ml.classification.LogisticRegressionModel

All Implemented Interfaces:: Serializable, org.apache.spark.internal.Logging, ClassifierParams, LogisticRegressionParams, ProbabilisticClassifierParams, Params, HasAggregationDepth, HasElasticNetParam, HasFeaturesCol, HasFitIntercept, HasLabelCol, HasMaxBlockSizeInMB, HasMaxIter, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, HasRegParam, HasStandardization, HasThreshold, HasThresholds, HasTol, HasWeightCol, PredictorParams, HasTrainingSummary<LogisticRegressionTrainingSummary>, Identifiable, MLWritable

public class LogisticRegressionModel extends ProbabilisticClassificationModel<Vector,LogisticRegressionModel> implements MLWritable, LogisticRegressionParams, HasTrainingSummary<LogisticRegressionTrainingSummary>

Model produced by LogisticRegression.

See Also:

Serialized Form

Nested Class Summary

Nested Classes

Modifier and Type

Class

Description

static class

LogisticRegressionModel.Data$

Nested classes/interfaces inherited from interface org.apache.spark.internal.Logging
org.apache.spark.internal.Logging.LogStringContext, org.apache.spark.internal.Logging.SparkShellLoggingFilter
Method Summary

Modifier and Type

Method

Description

final IntParam

aggregationDepth()

Param for suggested depth for treeAggregate (>= 2).

BinaryLogisticRegressionTrainingSummary

binarySummary()

Gets summary of model on training set.

Matrix

coefficientMatrix()

Vector

coefficients()

A vector of model coefficients for "binomial" logistic regression.

LogisticRegressionModel

copy(ParamMap extra)

Creates a copy of this instance with the same UID and some extra params.

final DoubleParam

elasticNetParam()

Param for the ElasticNet mixing parameter, in range [0, 1].

LogisticRegressionSummary

evaluate(Dataset<?> dataset)

Evaluates the model on a test dataset.

final Param<String>

family()

Param for the name of family which is a description of the label distribution to be used in the model.

final BooleanParam

fitIntercept()

Param for whether to fit an intercept term.

double

getThreshold()

Get threshold for binary classification.

double[]

getThresholds()

Get thresholds for binary or multiclass classification.

double

intercept()

The model intercept for "binomial" logistic regression.

Vector

interceptVector()

static LogisticRegressionModel

load(String path)

Param<Matrix>

lowerBoundsOnCoefficients()

The lower bounds on coefficients if fitting under bound constrained optimization.

Param<Vector>

lowerBoundsOnIntercepts()

The lower bounds on intercepts if fitting under bound constrained optimization.

final DoubleParam

maxBlockSizeInMB()

Param for Maximum memory in MB for stacking input data into blocks.

final IntParam

maxIter()

Param for maximum number of iterations (>= 0).

int

numClasses()

Number of classes (values which the label can take).

int

numFeatures()

Returns the number of features the model was trained on.

double

predict(Vector features)

Predict label for the given feature vector.

Vector

predictRaw(Vector features)

Raw prediction for each possible label.

static MLReader<LogisticRegressionModel>

read()

final DoubleParam

regParam()

Param for regularization parameter (>= 0).

LogisticRegressionModel

setThreshold(double value)

Set threshold in binary classification, in range [0, 1].

LogisticRegressionModel

setThresholds(double[] value)

Set thresholds in multiclass (or binary) classification to adjust the probability of predicting each class.

final BooleanParam

standardization()

Param for whether to standardize the training features before fitting the model.

LogisticRegressionTrainingSummary

summary()

Gets summary of model on training set.

DoubleParam

threshold()

Param for threshold in binary classification prediction, in range [0, 1].

final DoubleParam

tol()

Param for the convergence tolerance for iterative algorithms (>= 0).

String

toString()

String

uid()

An immutable unique ID for the object and its derivatives.

Param<Matrix>

upperBoundsOnCoefficients()

The upper bounds on coefficients if fitting under bound constrained optimization.

Param<Vector>

upperBoundsOnIntercepts()

The upper bounds on intercepts if fitting under bound constrained optimization.

final Param<String>

weightCol()

Param for weight column name.

MLWriter

write()

Returns a MLWriter instance for this ML instance.

Methods inherited from class org.apache.spark.ml.classification.ProbabilisticClassificationModel
normalizeToProbabilitiesInPlace, predictProbability, probabilityCol, setProbabilityCol, thresholds, transform, transformSchema

Methods inherited from class org.apache.spark.ml.classification.ClassificationModel
rawPredictionCol, setRawPredictionCol, transformImpl

Methods inherited from class org.apache.spark.ml.PredictionModel
featuresCol, labelCol, predictionCol, setFeaturesCol, setPredictionCol

Methods inherited from class org.apache.spark.ml.Model
hasParent, parent, setParent

Methods inherited from class org.apache.spark.ml.Transformer
transform, transform, transform

Methods inherited from class org.apache.spark.ml.PipelineStage
params

Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait

Methods inherited from interface org.apache.spark.ml.param.shared.HasAggregationDepth
getAggregationDepth

Methods inherited from interface org.apache.spark.ml.param.shared.HasElasticNetParam
getElasticNetParam

Methods inherited from interface org.apache.spark.ml.param.shared.HasFeaturesCol
featuresCol, getFeaturesCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasFitIntercept
getFitIntercept

Methods inherited from interface org.apache.spark.ml.param.shared.HasLabelCol
getLabelCol, labelCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasMaxBlockSizeInMB
getMaxBlockSizeInMB

Methods inherited from interface org.apache.spark.ml.param.shared.HasMaxIter
getMaxIter

Methods inherited from interface org.apache.spark.ml.param.shared.HasPredictionCol
getPredictionCol, predictionCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasProbabilityCol
getProbabilityCol, probabilityCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasRawPredictionCol
getRawPredictionCol, rawPredictionCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasRegParam
getRegParam

Methods inherited from interface org.apache.spark.ml.param.shared.HasStandardization
getStandardization

Methods inherited from interface org.apache.spark.ml.param.shared.HasThresholds
thresholds

Methods inherited from interface org.apache.spark.ml.param.shared.HasTol
getTol

Methods inherited from interface org.apache.spark.ml.util.HasTrainingSummary
hasSummary, setSummary

Methods inherited from interface org.apache.spark.ml.param.shared.HasWeightCol
getWeightCol

Methods inherited from interface org.apache.spark.internal.Logging
initializeForcefully, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, isTraceEnabled, log, logBasedOnLevel, logDebug, logDebug, logDebug, logDebug, logError, logError, logError, logError, logInfo, logInfo, logInfo, logInfo, logName, LogStringContext, logTrace, logTrace, logTrace, logTrace, logWarning, logWarning, logWarning, logWarning, MDC, org$apache$spark$internal$Logging$$log_, org$apache$spark$internal$Logging$$log__$eq, withLogContext

Methods inherited from interface org.apache.spark.ml.classification.LogisticRegressionParams
checkThresholdConsistency, getFamily, getLowerBoundsOnCoefficients, getLowerBoundsOnIntercepts, getUpperBoundsOnCoefficients, getUpperBoundsOnIntercepts, usingBoundConstrainedOptimization, validateAndTransformSchema

Methods inherited from interface org.apache.spark.ml.util.MLWritable
save

Methods inherited from interface org.apache.spark.ml.param.Params
clear, copyValues, defaultCopy, defaultParamMap, estimateMatadataSize, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, paramMap, params, set, set, set, setDefault, setDefault, shouldOwn

Method Details
- read
  
  public static MLReader<LogisticRegressionModel> read()
- load
  
  public static LogisticRegressionModel load(String path)
- family
  
  public final Param<String> family()
  
  Description copied from interface: LogisticRegressionParams
  
  Param for the name of family which is a description of the label distribution to be used in the model. Supported options: - "auto": Automatically select the family based on the number of classes: If numClasses == 1 || numClasses == 2, set to "binomial". Else, set to "multinomial" - "binomial": Binary logistic regression with pivoting. - "multinomial": Multinomial logistic (softmax) regression without pivoting. Default is "auto".
  
  Specified by:
  
  family in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- lowerBoundsOnCoefficients
  
  public Param<Matrix> lowerBoundsOnCoefficients()
  
  Description copied from interface: LogisticRegressionParams
  
  The lower bounds on coefficients if fitting under bound constrained optimization. The bound matrix must be compatible with the shape (1, number of features) for binomial regression, or (number of classes, number of features) for multinomial regression. Otherwise, it throws exception. Default is none.
  
  Specified by:
  
  lowerBoundsOnCoefficients in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- upperBoundsOnCoefficients
  
  public Param<Matrix> upperBoundsOnCoefficients()
  
  Description copied from interface: LogisticRegressionParams
  
  The upper bounds on coefficients if fitting under bound constrained optimization. The bound matrix must be compatible with the shape (1, number of features) for binomial regression, or (number of classes, number of features) for multinomial regression. Otherwise, it throws exception. Default is none.
  
  Specified by:
  
  upperBoundsOnCoefficients in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- lowerBoundsOnIntercepts
  
  public Param<Vector> lowerBoundsOnIntercepts()
  
  Description copied from interface: LogisticRegressionParams
  
  The lower bounds on intercepts if fitting under bound constrained optimization. The bounds vector size must be equal to 1 for binomial regression, or the number of classes for multinomial regression. Otherwise, it throws exception. Default is none.
  
  Specified by:
  
  lowerBoundsOnIntercepts in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- upperBoundsOnIntercepts
  
  public Param<Vector> upperBoundsOnIntercepts()
  
  Description copied from interface: LogisticRegressionParams
  
  The upper bounds on intercepts if fitting under bound constrained optimization. The bound vector size must be equal to 1 for binomial regression, or the number of classes for multinomial regression. Otherwise, it throws exception. Default is none.
  
  Specified by:
  
  upperBoundsOnIntercepts in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- maxBlockSizeInMB
  
  public final DoubleParam maxBlockSizeInMB()
  
  Description copied from interface: HasMaxBlockSizeInMB
  
  Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0..
  
  Specified by:
  
  maxBlockSizeInMB in interface HasMaxBlockSizeInMB
  
  Returns:
  
  (undocumented)
- aggregationDepth
  
  public final IntParam aggregationDepth()
  
  Description copied from interface: HasAggregationDepth
  
  Param for suggested depth for treeAggregate (>= 2).
  
  Specified by:
  
  aggregationDepth in interface HasAggregationDepth
  
  Returns:
  
  (undocumented)
- threshold
  
  public DoubleParam threshold()
  
  Description copied from interface: HasThreshold
  
  Param for threshold in binary classification prediction, in range [0, 1].
  
  Specified by:
  
  threshold in interface HasThreshold
  
  Returns:
  
  (undocumented)
- weightCol
  
  public final Param<String> weightCol()
  
  Description copied from interface: HasWeightCol
  
  Param for weight column name. If this is not set or empty, we treat all instance weights as 1.0.
  
  Specified by:
  
  weightCol in interface HasWeightCol
  
  Returns:
  
  (undocumented)
- standardization
  
  public final BooleanParam standardization()
  
  Description copied from interface: HasStandardization
  
  Param for whether to standardize the training features before fitting the model.
  
  Specified by:
  
  standardization in interface HasStandardization
  
  Returns:
  
  (undocumented)
- tol
  
  public final DoubleParam tol()
  
  Description copied from interface: HasTol
  
  Param for the convergence tolerance for iterative algorithms (>= 0).
  
  Specified by:
  
  tol in interface HasTol
  
  Returns:
  
  (undocumented)
- fitIntercept
  
  public final BooleanParam fitIntercept()
  
  Description copied from interface: HasFitIntercept
  
  Param for whether to fit an intercept term.
  
  Specified by:
  
  fitIntercept in interface HasFitIntercept
  
  Returns:
  
  (undocumented)
- maxIter
  
  public final IntParam maxIter()
  
  Description copied from interface: HasMaxIter
  
  Param for maximum number of iterations (>= 0).
  
  Specified by:
  
  maxIter in interface HasMaxIter
  
  Returns:
  
  (undocumented)
- elasticNetParam
  
  public final DoubleParam elasticNetParam()
  
  Description copied from interface: HasElasticNetParam
  
  Param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
  
  Specified by:
  
  elasticNetParam in interface HasElasticNetParam
  
  Returns:
  
  (undocumented)
- regParam
  
  public final DoubleParam regParam()
  
  Description copied from interface: HasRegParam
  
  Param for regularization parameter (>= 0).
  
  Specified by:
  
  regParam in interface HasRegParam
  
  Returns:
  
  (undocumented)
- uid
  
  public String uid()
  
  Description copied from interface: Identifiable
  
  An immutable unique ID for the object and its derivatives.
  
  Specified by:
  
  uid in interface Identifiable
  
  Returns:
  
  (undocumented)
- coefficientMatrix
  
  public Matrix coefficientMatrix()
- interceptVector
  
  public Vector interceptVector()
- numClasses
  
  public int numClasses()
  
  Description copied from class: ClassificationModel
  
  Number of classes (values which the label can take).
  
  Specified by:
  
  numClasses in class ClassificationModel<Vector,LogisticRegressionModel>
- coefficients
  
  public Vector coefficients()
  
  A vector of model coefficients for "binomial" logistic regression. If this model was trained using the "multinomial" family then an exception is thrown.
  
  Returns:
  
  Vector
- intercept
  
  public double intercept()
  
  The model intercept for "binomial" logistic regression. If this model was fit with the "multinomial" family then an exception is thrown.
  
  Returns:
  
  Double
- setThreshold
  
  public LogisticRegressionModel setThreshold(double value)
  
  Description copied from interface: LogisticRegressionParams
  
  Set threshold in binary classification, in range [0, 1].
  If the estimated probability of class label 1 is greater than threshold, then predict 1, else 0. A high threshold encourages the model to predict 0 more often; a low threshold encourages the model to predict 1 more often.
  Note: Calling this with threshold p is equivalent to calling setThresholds(Array(1-p, p)). When setThreshold() is called, any user-set value for thresholds will be cleared. If both threshold and thresholds are set in a ParamMap, then they must be equivalent.
  Default is 0.5.
  
  Specified by:
  
  setThreshold in interface LogisticRegressionParams
  
  Parameters:
  
  value - (undocumented)
  
  Returns:
  
  (undocumented)
- getThreshold
  
  public double getThreshold()
  
  Description copied from interface: LogisticRegressionParams
  Get threshold for binary classification.
  If thresholds is set with length 2 (i.e., binary classification), this returns the equivalent threshold:
  1 / (1 + thresholds(0) / thresholds(1))
  . Otherwise, returns `threshold` if set, or its default value if unset. @group getParam @throws IllegalArgumentException if `thresholds` is set to an array of length other than 2.
  Specified by:
  
  getThreshold in interface HasThreshold
  
  Specified by:
  
  getThreshold in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- setThresholds
  
  public LogisticRegressionModel setThresholds(double[] value)
  
  Description copied from interface: LogisticRegressionParams
  
  Set thresholds in multiclass (or binary) classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values greater than 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
  Note: When setThresholds() is called, any user-set value for threshold will be cleared. If both threshold and thresholds are set in a ParamMap, then they must be equivalent.
  
  Specified by:
  
  setThresholds in interface LogisticRegressionParams
  
  Overrides:
  
  setThresholds in class ProbabilisticClassificationModel<Vector,LogisticRegressionModel>
  
  Parameters:
  
  value - (undocumented)
  
  Returns:
  
  (undocumented)
- getThresholds
  
  public double[] getThresholds()
  
  Description copied from interface: LogisticRegressionParams
  
  Get thresholds for binary or multiclass classification.
  If thresholds is set, return its value. Otherwise, if threshold is set, return the equivalent thresholds for binary classification: (1-threshold, threshold). If neither are set, throw an exception.
  
  Specified by:
  
  getThresholds in interface HasThresholds
  
  Specified by:
  
  getThresholds in interface LogisticRegressionParams
  
  Returns:
  
  (undocumented)
- numFeatures
  
  public int numFeatures()
  
  Description copied from class: PredictionModel
  
  Returns the number of features the model was trained on. If unknown, returns -1
  
  Overrides:
  
  numFeatures in class PredictionModel<Vector,LogisticRegressionModel>
- summary
  
  public LogisticRegressionTrainingSummary summary()
  
  Gets summary of model on training set. An exception is thrown if hasSummary is false.
  
  Specified by:
  
  summary in interface HasTrainingSummary<LogisticRegressionTrainingSummary>
  
  Returns:
  
  (undocumented)
- binarySummary
  
  public BinaryLogisticRegressionTrainingSummary binarySummary()
  
  Gets summary of model on training set. An exception is thrown if hasSummary is false or it is a multiclass model.
  
  Returns:
  
  (undocumented)
- evaluate
  
  public LogisticRegressionSummary evaluate(Dataset<?> dataset)
  
  Evaluates the model on a test dataset.
  
  Parameters:
  
  dataset - Test dataset to evaluate model on.
  
  Returns:
  
  (undocumented)
- predict
  
  public double predict(Vector features)
  
  Predict label for the given feature vector. The behavior of this can be adjusted using thresholds.
  
  Overrides:
  
  predict in class ClassificationModel<Vector,LogisticRegressionModel>
  
  Parameters:
  
  features - (undocumented)
  
  Returns:
  
  (undocumented)
- predictRaw
  
  public Vector predictRaw(Vector features)
  
  Description copied from class: ClassificationModel
  
  Raw prediction for each possible label. The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives a measure of confidence in each possible label (where larger = more confident). This internal method is used to implement transform() and output ClassificationModel.rawPredictionCol().
  
  Specified by:
  
  predictRaw in class ClassificationModel<Vector,LogisticRegressionModel>
  
  Parameters:
  
  features - (undocumented)
  
  Returns:
  
  vector where element i is the raw prediction for label i. This raw prediction may be any real number, where a larger value indicates greater confidence for that label.
- copy
  
  public LogisticRegressionModel copy(ParamMap extra)
  
  Description copied from interface: Params
  
  Creates a copy of this instance with the same UID and some extra params. Subclasses should implement this method and set the return type properly. See defaultCopy().
  
  Specified by:
  
  copy in interface Params
  
  Specified by:
  
  copy in class Model<LogisticRegressionModel>
  
  Parameters:
  
  extra - (undocumented)
  
  Returns:
  
  (undocumented)
- write
  
  public MLWriter write()
  
  Returns a MLWriter instance for this ML instance.
  For LogisticRegressionModel, this does NOT currently save the training summary(). An option to save summary() may be added in the future.
  This also does not save the Model.parent() currently.
  
  Specified by:
  
  write in interface MLWritable
  
  Returns:
  
  (undocumented)
- toString
  
  public String toString()
  
  Specified by:
  
  toString in interface Identifiable
  
  Overrides:
  
  toString in class Object

Class LogisticRegressionModel

Nested Class Summary

Nested classes/interfaces inherited from interface org.apache.spark.internal.Logging

Method Summary

Methods inherited from class org.apache.spark.ml.classification.ProbabilisticClassificationModel

Methods inherited from class org.apache.spark.ml.classification.ClassificationModel

Methods inherited from class org.apache.spark.ml.PredictionModel

Methods inherited from class org.apache.spark.ml.Model

Methods inherited from class org.apache.spark.ml.Transformer

Methods inherited from class org.apache.spark.ml.PipelineStage

Methods inherited from class java.lang.Object

Methods inherited from interface org.apache.spark.ml.param.shared.HasAggregationDepth

Methods inherited from interface org.apache.spark.ml.param.shared.HasElasticNetParam

Methods inherited from interface org.apache.spark.ml.param.shared.HasFeaturesCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasFitIntercept

Methods inherited from interface org.apache.spark.ml.param.shared.HasLabelCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasMaxBlockSizeInMB

Methods inherited from interface org.apache.spark.ml.param.shared.HasMaxIter

Methods inherited from interface org.apache.spark.ml.param.shared.HasPredictionCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasProbabilityCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasRawPredictionCol

Methods inherited from interface org.apache.spark.ml.param.shared.HasRegParam

Methods inherited from interface org.apache.spark.ml.param.shared.HasStandardization

Methods inherited from interface org.apache.spark.ml.param.shared.HasThresholds

Methods inherited from interface org.apache.spark.ml.param.shared.HasTol

Methods inherited from interface org.apache.spark.ml.util.HasTrainingSummary

Methods inherited from interface org.apache.spark.ml.param.shared.HasWeightCol

Methods inherited from interface org.apache.spark.internal.Logging

Methods inherited from interface org.apache.spark.ml.classification.LogisticRegressionParams

Methods inherited from interface org.apache.spark.ml.util.MLWritable

Methods inherited from interface org.apache.spark.ml.param.Params

Method Details

read

load

family

lowerBoundsOnCoefficients

upperBoundsOnCoefficients

lowerBoundsOnIntercepts

upperBoundsOnIntercepts

maxBlockSizeInMB

aggregationDepth

threshold

weightCol

standardization

tol

fitIntercept

maxIter

elasticNetParam

regParam

uid

coefficientMatrix

interceptVector

numClasses

coefficients

intercept

setThreshold

getThreshold

setThresholds

getThresholds

numFeatures

summary

binarySummary

evaluate

predict

predictRaw

copy

write

toString