org.apache.spark.mllib.random
Class RandomRDDs

Object
  extended by org.apache.spark.mllib.random.RandomRDDs

public class RandomRDDs
extends Object

:: Experimental :: Generator methods for creating RDDs comprised of i.i.d. samples from some distribution.


Constructor Summary
RandomRDDs()
           
 
Method Summary
static JavaDoubleRDD exponentialJavaRDD(JavaSparkContext jsc, double mean, long size)
          exponentialJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default number of partitions and the default seed.
static JavaDoubleRDD exponentialJavaRDD(JavaSparkContext jsc, double mean, long size, int numPartitions)
          exponentialJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default seed.
static JavaDoubleRDD exponentialJavaRDD(JavaSparkContext jsc, double mean, long size, int numPartitions, long seed)
          Java-friendly version of exponentialRDD(org.apache.spark.SparkContext, double, long, int, long).
static JavaRDD<Vector> exponentialJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols)
          exponentialJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default number of partitions and the default seed.
static JavaRDD<Vector> exponentialJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols, int numPartitions)
          exponentialJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default seed.
static JavaRDD<Vector> exponentialJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols, int numPartitions, long seed)
          Java-friendly version of exponentialVectorRDD(org.apache.spark.SparkContext, double, long, int, int, long).
static RDD<Object> exponentialRDD(SparkContext sc, double mean, long size, int numPartitions, long seed)
          Generates an RDD comprised of i.i.d. samples from the exponential distribution with the input mean.
static RDD<Vector> exponentialVectorRDD(SparkContext sc, double mean, long numRows, int numCols, int numPartitions, long seed)
          Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the exponential distribution with the input mean.
static JavaDoubleRDD gammaJavaRDD(JavaSparkContext jsc, double shape, double scale, long size)
          gammaJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default number of partitions and the default seed.
static JavaDoubleRDD gammaJavaRDD(JavaSparkContext jsc, double shape, double scale, long size, int numPartitions)
          gammaJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default seed.
static JavaDoubleRDD gammaJavaRDD(JavaSparkContext jsc, double shape, double scale, long size, int numPartitions, long seed)
          Java-friendly version of gammaRDD(org.apache.spark.SparkContext, double, double, long, int, long).
static JavaRDD<Vector> gammaJavaVectorRDD(JavaSparkContext jsc, double shape, double scale, long numRows, int numCols)
          gammaJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default number of partitions and the default seed.
static JavaRDD<Vector> gammaJavaVectorRDD(JavaSparkContext jsc, double shape, double scale, long numRows, int numCols, int numPartitions)
          gammaJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default seed.
static JavaRDD<Vector> gammaJavaVectorRDD(JavaSparkContext jsc, double shape, double scale, long numRows, int numCols, int numPartitions, long seed)
          Java-friendly version of gammaVectorRDD(org.apache.spark.SparkContext, double, double, long, int, int, long).
static RDD<Object> gammaRDD(SparkContext sc, double shape, double scale, long size, int numPartitions, long seed)
          Generates an RDD comprised of i.i.d. samples from the gamma distribution with the input shape and scale.
static RDD<Vector> gammaVectorRDD(SparkContext sc, double shape, double scale, long numRows, int numCols, int numPartitions, long seed)
          Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the gamma distribution with the input shape and scale.
static JavaDoubleRDD logNormalJavaRDD(JavaSparkContext jsc, double mean, double std, long size)
          logNormalJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default number of partitions and the default seed.
static JavaDoubleRDD logNormalJavaRDD(JavaSparkContext jsc, double mean, double std, long size, int numPartitions)
          logNormalJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default seed.
static JavaDoubleRDD logNormalJavaRDD(JavaSparkContext jsc, double mean, double std, long size, int numPartitions, long seed)
          Java-friendly version of logNormalRDD(org.apache.spark.SparkContext, double, double, long, int, long).
static JavaRDD<Vector> logNormalJavaVectorRDD(JavaSparkContext jsc, double mean, double std, long numRows, int numCols)
          logNormalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default number of partitions and the default seed.
static JavaRDD<Vector> logNormalJavaVectorRDD(JavaSparkContext jsc, double mean, double std, long numRows, int numCols, int numPartitions)
          logNormalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default seed.
static JavaRDD<Vector> logNormalJavaVectorRDD(JavaSparkContext jsc, double mean, double std, long numRows, int numCols, int numPartitions, long seed)
          Java-friendly version of logNormalVectorRDD(org.apache.spark.SparkContext, double, double, long, int, int, long).
static RDD<Object> logNormalRDD(SparkContext sc, double mean, double std, long size, int numPartitions, long seed)
          Generates an RDD comprised of i.i.d. samples from the log normal distribution with the input mean and standard deviation
static RDD<Vector> logNormalVectorRDD(SparkContext sc, double mean, double std, long numRows, int numCols, int numPartitions, long seed)
          Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from a log normal distribution.
static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc, long size)
          normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default number of partitions and the default seed.
static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc, long size, int numPartitions)
          normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default seed.
static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc, long size, int numPartitions, long seed)
          Java-friendly version of normalRDD(org.apache.spark.SparkContext, long, int, long).
static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols)
          normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default number of partitions and the default seed.
static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions)
          normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default seed.
static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions, long seed)
          Java-friendly version of normalVectorRDD(org.apache.spark.SparkContext, long, int, int, long).
static RDD<Object> normalRDD(SparkContext sc, long size, int numPartitions, long seed)
          Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
static RDD<Vector> normalVectorRDD(SparkContext sc, long numRows, int numCols, int numPartitions, long seed)
          Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the standard normal distribution.
static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc, double mean, long size)
          poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default number of partitions and the default seed.
static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc, double mean, long size, int numPartitions)
          poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default seed.
static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc, double mean, long size, int numPartitions, long seed)
          Java-friendly version of poissonRDD(org.apache.spark.SparkContext, double, long, int, long).
static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols)
          poissonJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default number of partitions and the default seed.
static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols, int numPartitions)
          poissonJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default seed.
static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols, int numPartitions, long seed)
          Java-friendly version of poissonVectorRDD(org.apache.spark.SparkContext, double, long, int, int, long).
static RDD<Object> poissonRDD(SparkContext sc, double mean, long size, int numPartitions, long seed)
          Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
static RDD<Vector> poissonVectorRDD(SparkContext sc, double mean, long numRows, int numCols, int numPartitions, long seed)
          Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the Poisson distribution with the input mean.
static
<T> RDD<T>
randomRDD(SparkContext sc, RandomDataGenerator<T> generator, long size, int numPartitions, long seed, scala.reflect.ClassTag<T> evidence$1)
          :: DeveloperApi :: Generates an RDD comprised of i.i.d. samples produced by the input RandomDataGenerator.
static RDD<Vector> randomVectorRDD(SparkContext sc, RandomDataGenerator<Object> generator, long numRows, int numCols, int numPartitions, long seed)
          :: DeveloperApi :: Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the input RandomDataGenerator.
static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc, long size)
          uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default number of partitions and the default seed.
static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc, long size, int numPartitions)
          uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default seed.
static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc, long size, int numPartitions, long seed)
          Java-friendly version of uniformRDD(org.apache.spark.SparkContext, long, int, long).
static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols)
          uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default number of partitions and the default seed.
static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions)
          uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default seed.
static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions, long seed)
          Java-friendly version of uniformVectorRDD(org.apache.spark.SparkContext, long, int, int, long).
static RDD<Object> uniformRDD(SparkContext sc, long size, int numPartitions, long seed)
          Generates an RDD comprised of i.i.d. samples from the uniform distribution U(0.0, 1.0).
static RDD<Vector> uniformVectorRDD(SparkContext sc, long numRows, int numCols, int numPartitions, long seed)
          Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the uniform distribution on U(0.0, 1.0).
 
Methods inherited from class Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

RandomRDDs

public RandomRDDs()
Method Detail

uniformRDD

public static RDD<Object> uniformRDD(SparkContext sc,
                                     long size,
                                     int numPartitions,
                                     long seed)
Generates an RDD comprised of i.i.d. samples from the uniform distribution U(0.0, 1.0).

To transform the distribution in the generated RDD from U(0.0, 1.0) to U(a, b), use RandomRDDs.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v).

Parameters:
sc - SparkContext used to create the RDD.
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Double] comprised of i.i.d. samples ~ U(0.0, 1.0).

uniformJavaRDD

public static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc,
                                           long size,
                                           int numPartitions,
                                           long seed)
Java-friendly version of uniformRDD(org.apache.spark.SparkContext, long, int, long).

Parameters:
jsc - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

uniformJavaRDD

public static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc,
                                           long size,
                                           int numPartitions)
uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default seed.

Parameters:
jsc - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

uniformJavaRDD

public static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc,
                                           long size)
uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
size - (undocumented)
Returns:
(undocumented)

normalRDD

public static RDD<Object> normalRDD(SparkContext sc,
                                    long size,
                                    int numPartitions,
                                    long seed)
Generates an RDD comprised of i.i.d. samples from the standard normal distribution.

To transform the distribution in the generated RDD from standard normal to some other normal N(mean, sigma^2^), use RandomRDDs.normalRDD(sc, n, p, seed).map(v => mean + sigma * v).

Parameters:
sc - SparkContext used to create the RDD.
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Double] comprised of i.i.d. samples ~ N(0.0, 1.0).

normalJavaRDD

public static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc,
                                          long size,
                                          int numPartitions,
                                          long seed)
Java-friendly version of normalRDD(org.apache.spark.SparkContext, long, int, long).

Parameters:
jsc - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

normalJavaRDD

public static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc,
                                          long size,
                                          int numPartitions)
normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default seed.

Parameters:
jsc - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

normalJavaRDD

public static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc,
                                          long size)
normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
size - (undocumented)
Returns:
(undocumented)

poissonRDD

public static RDD<Object> poissonRDD(SparkContext sc,
                                     double mean,
                                     long size,
                                     int numPartitions,
                                     long seed)
Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.

Parameters:
sc - SparkContext used to create the RDD.
mean - Mean, or lambda, for the Poisson distribution.
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Double] comprised of i.i.d. samples ~ Pois(mean).

poissonJavaRDD

public static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc,
                                           double mean,
                                           long size,
                                           int numPartitions,
                                           long seed)
Java-friendly version of poissonRDD(org.apache.spark.SparkContext, double, long, int, long).

Parameters:
jsc - (undocumented)
mean - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

poissonJavaRDD

public static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc,
                                           double mean,
                                           long size,
                                           int numPartitions)
poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

poissonJavaRDD

public static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc,
                                           double mean,
                                           long size)
poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
size - (undocumented)
Returns:
(undocumented)

exponentialRDD

public static RDD<Object> exponentialRDD(SparkContext sc,
                                         double mean,
                                         long size,
                                         int numPartitions,
                                         long seed)
Generates an RDD comprised of i.i.d. samples from the exponential distribution with the input mean.

Parameters:
sc - SparkContext used to create the RDD.
mean - Mean, or 1 / lambda, for the exponential distribution.
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Double] comprised of i.i.d. samples ~ Pois(mean).

exponentialJavaRDD

public static JavaDoubleRDD exponentialJavaRDD(JavaSparkContext jsc,
                                               double mean,
                                               long size,
                                               int numPartitions,
                                               long seed)
Java-friendly version of exponentialRDD(org.apache.spark.SparkContext, double, long, int, long).

Parameters:
jsc - (undocumented)
mean - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

exponentialJavaRDD

public static JavaDoubleRDD exponentialJavaRDD(JavaSparkContext jsc,
                                               double mean,
                                               long size,
                                               int numPartitions)
exponentialJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

exponentialJavaRDD

public static JavaDoubleRDD exponentialJavaRDD(JavaSparkContext jsc,
                                               double mean,
                                               long size)
exponentialJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
size - (undocumented)
Returns:
(undocumented)

gammaRDD

public static RDD<Object> gammaRDD(SparkContext sc,
                                   double shape,
                                   double scale,
                                   long size,
                                   int numPartitions,
                                   long seed)
Generates an RDD comprised of i.i.d. samples from the gamma distribution with the input shape and scale.

Parameters:
sc - SparkContext used to create the RDD.
shape - shape parameter (> 0) for the gamma distribution
scale - scale parameter (> 0) for the gamma distribution
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Double] comprised of i.i.d. samples ~ Pois(mean).

gammaJavaRDD

public static JavaDoubleRDD gammaJavaRDD(JavaSparkContext jsc,
                                         double shape,
                                         double scale,
                                         long size,
                                         int numPartitions,
                                         long seed)
Java-friendly version of gammaRDD(org.apache.spark.SparkContext, double, double, long, int, long).

Parameters:
jsc - (undocumented)
shape - (undocumented)
scale - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

gammaJavaRDD

public static JavaDoubleRDD gammaJavaRDD(JavaSparkContext jsc,
                                         double shape,
                                         double scale,
                                         long size,
                                         int numPartitions)
gammaJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default seed.

Parameters:
jsc - (undocumented)
shape - (undocumented)
scale - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

gammaJavaRDD

public static JavaDoubleRDD gammaJavaRDD(JavaSparkContext jsc,
                                         double shape,
                                         double scale,
                                         long size)
gammaJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
shape - (undocumented)
scale - (undocumented)
size - (undocumented)
Returns:
(undocumented)

logNormalRDD

public static RDD<Object> logNormalRDD(SparkContext sc,
                                       double mean,
                                       double std,
                                       long size,
                                       int numPartitions,
                                       long seed)
Generates an RDD comprised of i.i.d. samples from the log normal distribution with the input mean and standard deviation

Parameters:
sc - SparkContext used to create the RDD.
mean - mean for the log normal distribution
std - standard deviation for the log normal distribution
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Double] comprised of i.i.d. samples ~ Pois(mean).

logNormalJavaRDD

public static JavaDoubleRDD logNormalJavaRDD(JavaSparkContext jsc,
                                             double mean,
                                             double std,
                                             long size,
                                             int numPartitions,
                                             long seed)
Java-friendly version of logNormalRDD(org.apache.spark.SparkContext, double, double, long, int, long).

Parameters:
jsc - (undocumented)
mean - (undocumented)
std - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

logNormalJavaRDD

public static JavaDoubleRDD logNormalJavaRDD(JavaSparkContext jsc,
                                             double mean,
                                             double std,
                                             long size,
                                             int numPartitions)
logNormalJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
std - (undocumented)
size - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

logNormalJavaRDD

public static JavaDoubleRDD logNormalJavaRDD(JavaSparkContext jsc,
                                             double mean,
                                             double std,
                                             long size)
logNormalJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
std - (undocumented)
size - (undocumented)
Returns:
(undocumented)

randomRDD

public static <T> RDD<T> randomRDD(SparkContext sc,
                                   RandomDataGenerator<T> generator,
                                   long size,
                                   int numPartitions,
                                   long seed,
                                   scala.reflect.ClassTag<T> evidence$1)
:: DeveloperApi :: Generates an RDD comprised of i.i.d. samples produced by the input RandomDataGenerator.

Parameters:
sc - SparkContext used to create the RDD.
generator - RandomDataGenerator used to populate the RDD.
size - Size of the RDD.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
evidence$1 - (undocumented)
Returns:
RDD[Double] comprised of i.i.d. samples produced by generator.

uniformVectorRDD

public static RDD<Vector> uniformVectorRDD(SparkContext sc,
                                           long numRows,
                                           int numCols,
                                           int numPartitions,
                                           long seed)
Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the uniform distribution on U(0.0, 1.0).

Parameters:
sc - SparkContext used to create the RDD.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD.
seed - Seed for the RNG that generates the seed for the generator in each partition.
Returns:
RDD[Vector] with vectors containing i.i.d samples ~ U(0.0, 1.0).

uniformJavaVectorRDD

public static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc,
                                                   long numRows,
                                                   int numCols,
                                                   int numPartitions,
                                                   long seed)
Java-friendly version of uniformVectorRDD(org.apache.spark.SparkContext, long, int, int, long).

Parameters:
jsc - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

uniformJavaVectorRDD

public static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc,
                                                   long numRows,
                                                   int numCols,
                                                   int numPartitions)
uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default seed.

Parameters:
jsc - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

uniformJavaVectorRDD

public static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc,
                                                   long numRows,
                                                   int numCols)
uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
Returns:
(undocumented)

normalVectorRDD

public static RDD<Vector> normalVectorRDD(SparkContext sc,
                                          long numRows,
                                          int numCols,
                                          int numPartitions,
                                          long seed)
Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the standard normal distribution.

Parameters:
sc - SparkContext used to create the RDD.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).

normalJavaVectorRDD

public static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc,
                                                  long numRows,
                                                  int numCols,
                                                  int numPartitions,
                                                  long seed)
Java-friendly version of normalVectorRDD(org.apache.spark.SparkContext, long, int, int, long).

Parameters:
jsc - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

normalJavaVectorRDD

public static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc,
                                                  long numRows,
                                                  int numCols,
                                                  int numPartitions)
normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default seed.

Parameters:
jsc - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

normalJavaVectorRDD

public static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc,
                                                  long numRows,
                                                  int numCols)
normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
Returns:
(undocumented)

logNormalVectorRDD

public static RDD<Vector> logNormalVectorRDD(SparkContext sc,
                                             double mean,
                                             double std,
                                             long numRows,
                                             int numCols,
                                             int numPartitions,
                                             long seed)
Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from a log normal distribution.

Parameters:
sc - SparkContext used to create the RDD.
mean - Mean of the log normal distribution.
std - Standard deviation of the log normal distribution.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Vector] with vectors containing i.i.d. samples.

logNormalJavaVectorRDD

public static JavaRDD<Vector> logNormalJavaVectorRDD(JavaSparkContext jsc,
                                                     double mean,
                                                     double std,
                                                     long numRows,
                                                     int numCols,
                                                     int numPartitions,
                                                     long seed)
Java-friendly version of logNormalVectorRDD(org.apache.spark.SparkContext, double, double, long, int, int, long).

Parameters:
jsc - (undocumented)
mean - (undocumented)
std - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

logNormalJavaVectorRDD

public static JavaRDD<Vector> logNormalJavaVectorRDD(JavaSparkContext jsc,
                                                     double mean,
                                                     double std,
                                                     long numRows,
                                                     int numCols,
                                                     int numPartitions)
logNormalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
std - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

logNormalJavaVectorRDD

public static JavaRDD<Vector> logNormalJavaVectorRDD(JavaSparkContext jsc,
                                                     double mean,
                                                     double std,
                                                     long numRows,
                                                     int numCols)
logNormalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
std - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
Returns:
(undocumented)

poissonVectorRDD

public static RDD<Vector> poissonVectorRDD(SparkContext sc,
                                           double mean,
                                           long numRows,
                                           int numCols,
                                           int numPartitions,
                                           long seed)
Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the Poisson distribution with the input mean.

Parameters:
sc - SparkContext used to create the RDD.
mean - Mean, or lambda, for the Poisson distribution.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism)
seed - Random seed (default: a random long integer).
Returns:
RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).

poissonJavaVectorRDD

public static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc,
                                                   double mean,
                                                   long numRows,
                                                   int numCols,
                                                   int numPartitions,
                                                   long seed)
Java-friendly version of poissonVectorRDD(org.apache.spark.SparkContext, double, long, int, int, long).

Parameters:
jsc - (undocumented)
mean - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

poissonJavaVectorRDD

public static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc,
                                                   double mean,
                                                   long numRows,
                                                   int numCols,
                                                   int numPartitions)
poissonJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

poissonJavaVectorRDD

public static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc,
                                                   double mean,
                                                   long numRows,
                                                   int numCols)
poissonJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
Returns:
(undocumented)

exponentialVectorRDD

public static RDD<Vector> exponentialVectorRDD(SparkContext sc,
                                               double mean,
                                               long numRows,
                                               int numCols,
                                               int numPartitions,
                                               long seed)
Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the exponential distribution with the input mean.

Parameters:
sc - SparkContext used to create the RDD.
mean - Mean, or 1 / lambda, for the Exponential distribution.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism)
seed - Random seed (default: a random long integer).
Returns:
RDD[Vector] with vectors containing i.i.d. samples ~ Exp(mean).

exponentialJavaVectorRDD

public static JavaRDD<Vector> exponentialJavaVectorRDD(JavaSparkContext jsc,
                                                       double mean,
                                                       long numRows,
                                                       int numCols,
                                                       int numPartitions,
                                                       long seed)
Java-friendly version of exponentialVectorRDD(org.apache.spark.SparkContext, double, long, int, int, long).

Parameters:
jsc - (undocumented)
mean - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

exponentialJavaVectorRDD

public static JavaRDD<Vector> exponentialJavaVectorRDD(JavaSparkContext jsc,
                                                       double mean,
                                                       long numRows,
                                                       int numCols,
                                                       int numPartitions)
exponentialJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

exponentialJavaVectorRDD

public static JavaRDD<Vector> exponentialJavaVectorRDD(JavaSparkContext jsc,
                                                       double mean,
                                                       long numRows,
                                                       int numCols)
exponentialJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
mean - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
Returns:
(undocumented)

gammaVectorRDD

public static RDD<Vector> gammaVectorRDD(SparkContext sc,
                                         double shape,
                                         double scale,
                                         long numRows,
                                         int numCols,
                                         int numPartitions,
                                         long seed)
Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the gamma distribution with the input shape and scale.

Parameters:
sc - SparkContext used to create the RDD.
shape - shape parameter (> 0) for the gamma distribution.
scale - scale parameter (> 0) for the gamma distribution.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism)
seed - Random seed (default: a random long integer).
Returns:
RDD[Vector] with vectors containing i.i.d. samples ~ Exp(mean).

gammaJavaVectorRDD

public static JavaRDD<Vector> gammaJavaVectorRDD(JavaSparkContext jsc,
                                                 double shape,
                                                 double scale,
                                                 long numRows,
                                                 int numCols,
                                                 int numPartitions,
                                                 long seed)
Java-friendly version of gammaVectorRDD(org.apache.spark.SparkContext, double, double, long, int, int, long).

Parameters:
jsc - (undocumented)
shape - (undocumented)
scale - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
seed - (undocumented)
Returns:
(undocumented)

gammaJavaVectorRDD

public static JavaRDD<Vector> gammaJavaVectorRDD(JavaSparkContext jsc,
                                                 double shape,
                                                 double scale,
                                                 long numRows,
                                                 int numCols,
                                                 int numPartitions)
gammaJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default seed.

Parameters:
jsc - (undocumented)
shape - (undocumented)
scale - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
numPartitions - (undocumented)
Returns:
(undocumented)

gammaJavaVectorRDD

public static JavaRDD<Vector> gammaJavaVectorRDD(JavaSparkContext jsc,
                                                 double shape,
                                                 double scale,
                                                 long numRows,
                                                 int numCols)
gammaJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, double, long, int, int, long) with the default number of partitions and the default seed.

Parameters:
jsc - (undocumented)
shape - (undocumented)
scale - (undocumented)
numRows - (undocumented)
numCols - (undocumented)
Returns:
(undocumented)

randomVectorRDD

public static RDD<Vector> randomVectorRDD(SparkContext sc,
                                          RandomDataGenerator<Object> generator,
                                          long numRows,
                                          int numCols,
                                          int numPartitions,
                                          long seed)
:: DeveloperApi :: Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the input RandomDataGenerator.

Parameters:
sc - SparkContext used to create the RDD.
generator - RandomDataGenerator used to populate the RDD.
numRows - Number of Vectors in the RDD.
numCols - Number of elements in each Vector.
numPartitions - Number of partitions in the RDD (default: sc.defaultParallelism).
seed - Random seed (default: a random long integer).
Returns:
RDD[Vector] with vectors containing i.i.d. samples produced by generator.