public class JavaDoubleRDD
extends Object
Constructor and Description |
---|
JavaDoubleRDD(RDD<Object> srdd) |
Modifier and Type | Method and Description |
---|---|
static <U> U |
aggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp) |
JavaDoubleRDD |
cache()
Persist this RDD with the default storage level (
MEMORY_ONLY ). |
static <U> JavaPairRDD<T,U> |
cartesian(JavaRDDLike<U,?> other) |
static void |
checkpoint() |
scala.reflect.ClassTag<Double> |
classTag() |
JavaDoubleRDD |
coalesce(int numPartitions)
Return a new RDD that is reduced into
numPartitions partitions. |
JavaDoubleRDD |
coalesce(int numPartitions,
boolean shuffle)
Return a new RDD that is reduced into
numPartitions partitions. |
static java.util.List<T> |
collect() |
static JavaFutureAction<java.util.List<T>> |
collectAsync() |
static java.util.List<T>[] |
collectPartitions(int[] partitionIds) |
static SparkContext |
context() |
static long |
count() |
static PartialResult<BoundedDouble> |
countApprox(long timeout) |
static PartialResult<BoundedDouble> |
countApprox(long timeout,
double confidence) |
static long |
countApproxDistinct(double relativeSD) |
static JavaFutureAction<Long> |
countAsync() |
static java.util.Map<T,Long> |
countByValue() |
static PartialResult<java.util.Map<T,BoundedDouble>> |
countByValueApprox(long timeout) |
static PartialResult<java.util.Map<T,BoundedDouble>> |
countByValueApprox(long timeout,
double confidence) |
JavaDoubleRDD |
distinct()
Return a new RDD containing the distinct elements in this RDD.
|
JavaDoubleRDD |
distinct(int numPartitions)
Return a new RDD containing the distinct elements in this RDD.
|
JavaDoubleRDD |
filter(Function<Double,Boolean> f)
Return a new RDD containing only the elements that satisfy a predicate.
|
Double |
first()
Return the first element in this RDD.
|
static <U> JavaRDD<U> |
flatMap(FlatMapFunction<T,U> f) |
static JavaDoubleRDD |
flatMapToDouble(DoubleFlatMapFunction<T> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
flatMapToPair(PairFlatMapFunction<T,K2,V2> f) |
static T |
fold(T zeroValue,
Function2<T,T,T> f) |
static void |
foreach(VoidFunction<T> f) |
static JavaFutureAction<Void> |
foreachAsync(VoidFunction<T> f) |
static void |
foreachPartition(VoidFunction<java.util.Iterator<T>> f) |
static JavaFutureAction<Void> |
foreachPartitionAsync(VoidFunction<java.util.Iterator<T>> f) |
static JavaDoubleRDD |
fromRDD(RDD<Object> rdd) |
static Optional<String> |
getCheckpointFile() |
static int |
getNumPartitions() |
static StorageLevel |
getStorageLevel() |
static JavaRDD<java.util.List<T>> |
glom() |
static <U> JavaPairRDD<U,Iterable<T>> |
groupBy(Function<T,U> f) |
static <U> JavaPairRDD<U,Iterable<T>> |
groupBy(Function<T,U> f,
int numPartitions) |
long[] |
histogram(double[] buckets)
Compute a histogram using the provided buckets.
|
long[] |
histogram(Double[] buckets,
boolean evenBuckets) |
scala.Tuple2<double[],long[]> |
histogram(int bucketCount)
Compute a histogram of the data using bucketCount number of buckets evenly
spaced between the minimum and maximum of the RDD.
|
static int |
id() |
JavaDoubleRDD |
intersection(JavaDoubleRDD other)
Return the intersection of this RDD and another one.
|
static boolean |
isCheckpointed() |
static boolean |
isEmpty() |
static java.util.Iterator<T> |
iterator(Partition split,
TaskContext taskContext) |
static <U> JavaPairRDD<U,T> |
keyBy(Function<T,U> f) |
static <R> JavaRDD<R> |
map(Function<T,R> f) |
static <U> JavaRDD<U> |
mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f) |
static <U> JavaRDD<U> |
mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f,
boolean preservesPartitioning) |
static JavaDoubleRDD |
mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f) |
static JavaDoubleRDD |
mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f,
boolean preservesPartitioning) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f,
boolean preservesPartitioning) |
static <R> JavaRDD<R> |
mapPartitionsWithIndex(Function2<Integer,java.util.Iterator<T>,java.util.Iterator<R>> f,
boolean preservesPartitioning) |
static <R> boolean |
mapPartitionsWithIndex$default$2() |
static <R> JavaDoubleRDD |
mapToDouble(DoubleFunction<T> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapToPair(PairFunction<T,K2,V2> f) |
Double |
max()
Returns the maximum element from this RDD as defined by
the default comparator natural order.
|
Double |
mean()
Compute the mean of this RDD's elements.
|
PartialResult<BoundedDouble> |
meanApprox(long timeout)
Approximate operation to return the mean within a timeout.
|
PartialResult<BoundedDouble> |
meanApprox(long timeout,
Double confidence)
Return the approximate mean of the elements in this RDD.
|
Double |
min()
Returns the minimum element from this RDD as defined by
the default comparator natural order.
|
static String |
name() |
static Optional<Partitioner> |
partitioner() |
static java.util.List<Partition> |
partitions() |
JavaDoubleRDD |
persist(StorageLevel newLevel)
Set this RDD's storage level to persist its values across operations after the first time
it is computed.
|
static JavaRDD<String> |
pipe(java.util.List<String> command) |
static JavaRDD<String> |
pipe(java.util.List<String> command,
java.util.Map<String,String> env) |
static JavaRDD<String> |
pipe(java.util.List<String> command,
java.util.Map<String,String> env,
boolean separateWorkingDir,
int bufferSize) |
static JavaRDD<String> |
pipe(java.util.List<String> command,
java.util.Map<String,String> env,
boolean separateWorkingDir,
int bufferSize,
String encoding) |
static JavaRDD<String> |
pipe(String command) |
Double |
popStdev()
Compute the population standard deviation of this RDD's elements.
|
Double |
popVariance()
Compute the population variance of this RDD's elements.
|
RDD<Double> |
rdd() |
static T |
reduce(Function2<T,T,T> f) |
JavaDoubleRDD |
repartition(int numPartitions)
Return a new RDD that has exactly numPartitions partitions.
|
JavaDoubleRDD |
sample(boolean withReplacement,
Double fraction)
Return a sampled subset of this RDD.
|
JavaDoubleRDD |
sample(boolean withReplacement,
Double fraction,
long seed)
Return a sampled subset of this RDD.
|
Double |
sampleStdev()
Compute the sample standard deviation of this RDD's elements (which corrects for bias in
estimating the standard deviation by dividing by N-1 instead of N).
|
Double |
sampleVariance()
Compute the sample variance of this RDD's elements (which corrects for bias in
estimating the standard variance by dividing by N-1 instead of N).
|
static void |
saveAsObjectFile(String path) |
static void |
saveAsTextFile(String path) |
static void |
saveAsTextFile(String path,
Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec) |
JavaDoubleRDD |
setName(String name)
Assign a name to this RDD
|
RDD<Object> |
srdd() |
StatCounter |
stats()
Return a
StatCounter object that captures the mean, variance and
count of the RDD's elements in one operation. |
Double |
stdev()
Compute the population standard deviation of this RDD's elements.
|
JavaDoubleRDD |
subtract(JavaDoubleRDD other)
Return an RDD with the elements from
this that are not in other . |
JavaDoubleRDD |
subtract(JavaDoubleRDD other,
int numPartitions)
Return an RDD with the elements from
this that are not in other . |
JavaDoubleRDD |
subtract(JavaDoubleRDD other,
Partitioner p)
Return an RDD with the elements from
this that are not in other . |
Double |
sum()
Add up the elements in this RDD.
|
PartialResult<BoundedDouble> |
sumApprox(long timeout)
Approximate operation to return the sum within a timeout.
|
PartialResult<BoundedDouble> |
sumApprox(long timeout,
Double confidence)
Approximate operation to return the sum within a timeout.
|
static java.util.List<T> |
take(int num) |
static JavaFutureAction<java.util.List<T>> |
takeAsync(int num) |
static java.util.List<T> |
takeOrdered(int num) |
static java.util.List<T> |
takeOrdered(int num,
java.util.Comparator<T> comp) |
static java.util.List<T> |
takeSample(boolean withReplacement,
int num) |
static java.util.List<T> |
takeSample(boolean withReplacement,
int num,
long seed) |
static String |
toDebugString() |
static java.util.Iterator<T> |
toLocalIterator() |
static java.util.List<T> |
top(int num) |
static java.util.List<T> |
top(int num,
java.util.Comparator<T> comp) |
static RDD<Object> |
toRDD(JavaDoubleRDD rdd) |
static <U> U |
treeAggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp) |
static <U> U |
treeAggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp,
int depth) |
static T |
treeReduce(Function2<T,T,T> f) |
static T |
treeReduce(Function2<T,T,T> f,
int depth) |
JavaDoubleRDD |
union(JavaDoubleRDD other)
Return the union of this RDD and another one.
|
JavaDoubleRDD |
unpersist()
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
|
JavaDoubleRDD |
unpersist(boolean blocking)
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
|
Double |
variance()
Compute the population variance of this RDD's elements.
|
JavaDoubleRDD |
wrapRDD(RDD<Double> rdd) |
static <U> JavaPairRDD<T,U> |
zip(JavaRDDLike<U,?> other) |
static <U,V> JavaRDD<V> |
zipPartitions(JavaRDDLike<U,?> other,
FlatMapFunction2<java.util.Iterator<T>,java.util.Iterator<U>,V> f) |
static JavaPairRDD<T,Long> |
zipWithIndex() |
static JavaPairRDD<T,Long> |
zipWithUniqueId() |
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
aggregate, cartesian, checkpoint, collect, collectAsync, collectPartitions, context, count, countApprox, countApprox, countApproxDistinct, countAsync, countByValue, countByValueApprox, countByValueApprox, flatMap, flatMapToDouble, flatMapToPair, fold, foreach, foreachAsync, foreachPartition, foreachPartitionAsync, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, id, isCheckpointed, isEmpty, iterator, keyBy, map, mapPartitions, mapPartitions, mapPartitionsToDouble, mapPartitionsToDouble, mapPartitionsToPair, mapPartitionsToPair, mapPartitionsWithIndex, mapToDouble, mapToPair, max, min, name, partitioner, partitions, pipe, pipe, pipe, pipe, pipe, reduce, saveAsObjectFile, saveAsTextFile, saveAsTextFile, take, takeAsync, takeOrdered, takeOrdered, takeSample, takeSample, toDebugString, toLocalIterator, top, top, treeAggregate, treeAggregate, treeReduce, treeReduce, zip, zipPartitions, zipWithIndex, zipWithUniqueId
public JavaDoubleRDD(RDD<Object> srdd)
public static JavaDoubleRDD fromRDD(RDD<Object> rdd)
public static RDD<Object> toRDD(JavaDoubleRDD rdd)
public static java.util.List<Partition> partitions()
public static int getNumPartitions()
public static Optional<Partitioner> partitioner()
public static SparkContext context()
public static int id()
public static StorageLevel getStorageLevel()
public static java.util.Iterator<T> iterator(Partition split, TaskContext taskContext)
public static <R> JavaRDD<R> mapPartitionsWithIndex(Function2<Integer,java.util.Iterator<T>,java.util.Iterator<R>> f, boolean preservesPartitioning)
public static <R> JavaDoubleRDD mapToDouble(DoubleFunction<T> f)
public static <K2,V2> JavaPairRDD<K2,V2> mapToPair(PairFunction<T,K2,V2> f)
public static <U> JavaRDD<U> flatMap(FlatMapFunction<T,U> f)
public static JavaDoubleRDD flatMapToDouble(DoubleFlatMapFunction<T> f)
public static <K2,V2> JavaPairRDD<K2,V2> flatMapToPair(PairFlatMapFunction<T,K2,V2> f)
public static <U> JavaRDD<U> mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f)
public static <U> JavaRDD<U> mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f, boolean preservesPartitioning)
public static JavaDoubleRDD mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f)
public static <K2,V2> JavaPairRDD<K2,V2> mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f)
public static JavaDoubleRDD mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f, boolean preservesPartitioning)
public static <K2,V2> JavaPairRDD<K2,V2> mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f, boolean preservesPartitioning)
public static void foreachPartition(VoidFunction<java.util.Iterator<T>> f)
public static JavaRDD<java.util.List<T>> glom()
public static <U> JavaPairRDD<T,U> cartesian(JavaRDDLike<U,?> other)
public static <U> JavaPairRDD<U,Iterable<T>> groupBy(Function<T,U> f)
public static <U> JavaPairRDD<U,Iterable<T>> groupBy(Function<T,U> f, int numPartitions)
public static JavaRDD<String> pipe(String command)
public static JavaRDD<String> pipe(java.util.List<String> command)
public static JavaRDD<String> pipe(java.util.List<String> command, java.util.Map<String,String> env)
public static JavaRDD<String> pipe(java.util.List<String> command, java.util.Map<String,String> env, boolean separateWorkingDir, int bufferSize)
public static JavaRDD<String> pipe(java.util.List<String> command, java.util.Map<String,String> env, boolean separateWorkingDir, int bufferSize, String encoding)
public static <U> JavaPairRDD<T,U> zip(JavaRDDLike<U,?> other)
public static <U,V> JavaRDD<V> zipPartitions(JavaRDDLike<U,?> other, FlatMapFunction2<java.util.Iterator<T>,java.util.Iterator<U>,V> f)
public static JavaPairRDD<T,Long> zipWithUniqueId()
public static JavaPairRDD<T,Long> zipWithIndex()
public static void foreach(VoidFunction<T> f)
public static java.util.List<T> collect()
public static java.util.Iterator<T> toLocalIterator()
public static java.util.List<T>[] collectPartitions(int[] partitionIds)
public static T reduce(Function2<T,T,T> f)
public static T treeReduce(Function2<T,T,T> f, int depth)
public static T treeReduce(Function2<T,T,T> f)
public static T fold(T zeroValue, Function2<T,T,T> f)
public static <U> U aggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp)
public static <U> U treeAggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp, int depth)
public static <U> U treeAggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp)
public static long count()
public static PartialResult<BoundedDouble> countApprox(long timeout, double confidence)
public static PartialResult<BoundedDouble> countApprox(long timeout)
public static java.util.Map<T,Long> countByValue()
public static PartialResult<java.util.Map<T,BoundedDouble>> countByValueApprox(long timeout, double confidence)
public static PartialResult<java.util.Map<T,BoundedDouble>> countByValueApprox(long timeout)
public static java.util.List<T> take(int num)
public static java.util.List<T> takeSample(boolean withReplacement, int num)
public static java.util.List<T> takeSample(boolean withReplacement, int num, long seed)
public static boolean isEmpty()
public static void saveAsTextFile(String path)
public static void saveAsTextFile(String path, Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)
public static void saveAsObjectFile(String path)
public static <U> JavaPairRDD<U,T> keyBy(Function<T,U> f)
public static void checkpoint()
public static boolean isCheckpointed()
public static Optional<String> getCheckpointFile()
public static String toDebugString()
public static java.util.List<T> top(int num, java.util.Comparator<T> comp)
public static java.util.List<T> top(int num)
public static java.util.List<T> takeOrdered(int num, java.util.Comparator<T> comp)
public static java.util.List<T> takeOrdered(int num)
public static long countApproxDistinct(double relativeSD)
public static String name()
public static JavaFutureAction<Long> countAsync()
public static JavaFutureAction<java.util.List<T>> collectAsync()
public static JavaFutureAction<java.util.List<T>> takeAsync(int num)
public static JavaFutureAction<Void> foreachAsync(VoidFunction<T> f)
public static JavaFutureAction<Void> foreachPartitionAsync(VoidFunction<java.util.Iterator<T>> f)
public static <R> boolean mapPartitionsWithIndex$default$2()
public RDD<Object> srdd()
public scala.reflect.ClassTag<Double> classTag()
public RDD<Double> rdd()
public JavaDoubleRDD wrapRDD(RDD<Double> rdd)
public JavaDoubleRDD cache()
MEMORY_ONLY
).public JavaDoubleRDD persist(StorageLevel newLevel)
newLevel
- (undocumented)public JavaDoubleRDD unpersist()
public JavaDoubleRDD unpersist(boolean blocking)
blocking
- Whether to block until all blocks are deleted.public Double first()
JavaRDDLike
public JavaDoubleRDD distinct()
public JavaDoubleRDD distinct(int numPartitions)
numPartitions
- (undocumented)public JavaDoubleRDD filter(Function<Double,Boolean> f)
f
- (undocumented)public JavaDoubleRDD coalesce(int numPartitions)
numPartitions
partitions.numPartitions
- (undocumented)public JavaDoubleRDD coalesce(int numPartitions, boolean shuffle)
numPartitions
partitions.numPartitions
- (undocumented)shuffle
- (undocumented)public JavaDoubleRDD repartition(int numPartitions)
Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data.
If you are decreasing the number of partitions in this RDD, consider using coalesce
,
which can avoid performing a shuffle.
numPartitions
- (undocumented)public JavaDoubleRDD subtract(JavaDoubleRDD other)
this
that are not in other
.
Uses this
partitioner/partition size, because even if other
is huge, the resulting
RDD will be <= us.
other
- (undocumented)public JavaDoubleRDD subtract(JavaDoubleRDD other, int numPartitions)
this
that are not in other
.other
- (undocumented)numPartitions
- (undocumented)public JavaDoubleRDD subtract(JavaDoubleRDD other, Partitioner p)
this
that are not in other
.other
- (undocumented)p
- (undocumented)public JavaDoubleRDD sample(boolean withReplacement, Double fraction)
withReplacement
- (undocumented)fraction
- (undocumented)public JavaDoubleRDD sample(boolean withReplacement, Double fraction, long seed)
withReplacement
- (undocumented)fraction
- (undocumented)seed
- (undocumented)public JavaDoubleRDD union(JavaDoubleRDD other)
.distinct()
to eliminate them).other
- (undocumented)public JavaDoubleRDD intersection(JavaDoubleRDD other)
other
- (undocumented)public Double sum()
public Double min()
public Double max()
public StatCounter stats()
StatCounter
object that captures the mean, variance and
count of the RDD's elements in one operation.public Double mean()
public Double variance()
public Double stdev()
public Double sampleStdev()
public Double sampleVariance()
public Double popStdev()
public Double popVariance()
public PartialResult<BoundedDouble> meanApprox(long timeout, Double confidence)
public PartialResult<BoundedDouble> meanApprox(long timeout)
timeout
- (undocumented)public PartialResult<BoundedDouble> sumApprox(long timeout, Double confidence)
timeout
- (undocumented)confidence
- (undocumented)public PartialResult<BoundedDouble> sumApprox(long timeout)
timeout
- (undocumented)public scala.Tuple2<double[],long[]> histogram(int bucketCount)
bucketCount
- (undocumented)public long[] histogram(double[] buckets)
buckets
- (undocumented)public long[] histogram(Double[] buckets, boolean evenBuckets)
public JavaDoubleRDD setName(String name)