public class JavaRDD<T>
extends java.lang.Object
Constructor and Description |
---|
JavaRDD(RDD<T> rdd,
scala.reflect.ClassTag<T> classTag) |
Modifier and Type | Method and Description |
---|---|
static <U> U |
aggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp) |
JavaRDD<T> |
cache()
Persist this RDD with the default storage level (`MEMORY_ONLY`).
|
static <U> JavaPairRDD<T,U> |
cartesian(JavaRDDLike<U,?> other) |
static void |
checkpoint() |
scala.reflect.ClassTag<T> |
classTag() |
JavaRDD<T> |
coalesce(int numPartitions)
Return a new RDD that is reduced into
numPartitions partitions. |
JavaRDD<T> |
coalesce(int numPartitions,
boolean shuffle)
Return a new RDD that is reduced into
numPartitions partitions. |
static java.util.List<T> |
collect() |
static JavaFutureAction<java.util.List<T>> |
collectAsync() |
static java.util.List<T>[] |
collectPartitions(int[] partitionIds) |
static SparkContext |
context() |
static long |
count() |
static PartialResult<BoundedDouble> |
countApprox(long timeout) |
static PartialResult<BoundedDouble> |
countApprox(long timeout,
double confidence) |
static long |
countApproxDistinct(double relativeSD) |
static JavaFutureAction<java.lang.Long> |
countAsync() |
static java.util.Map<T,java.lang.Long> |
countByValue() |
static PartialResult<java.util.Map<T,BoundedDouble>> |
countByValueApprox(long timeout) |
static PartialResult<java.util.Map<T,BoundedDouble>> |
countByValueApprox(long timeout,
double confidence) |
JavaRDD<T> |
distinct()
Return a new RDD containing the distinct elements in this RDD.
|
JavaRDD<T> |
distinct(int numPartitions)
Return a new RDD containing the distinct elements in this RDD.
|
JavaRDD<T> |
filter(Function<T,java.lang.Boolean> f)
Return a new RDD containing only the elements that satisfy a predicate.
|
static T |
first() |
static <U> JavaRDD<U> |
flatMap(FlatMapFunction<T,U> f) |
static JavaDoubleRDD |
flatMapToDouble(DoubleFlatMapFunction<T> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
flatMapToPair(PairFlatMapFunction<T,K2,V2> f) |
static T |
fold(T zeroValue,
Function2<T,T,T> f) |
static void |
foreach(VoidFunction<T> f) |
static JavaFutureAction<java.lang.Void> |
foreachAsync(VoidFunction<T> f) |
static void |
foreachPartition(VoidFunction<java.util.Iterator<T>> f) |
static JavaFutureAction<java.lang.Void> |
foreachPartitionAsync(VoidFunction<java.util.Iterator<T>> f) |
static <T> JavaRDD<T> |
fromRDD(RDD<T> rdd,
scala.reflect.ClassTag<T> evidence$1) |
static Optional<java.lang.String> |
getCheckpointFile() |
static int |
getNumPartitions() |
static StorageLevel |
getStorageLevel() |
static JavaRDD<java.util.List<T>> |
glom() |
static <U> JavaPairRDD<U,java.lang.Iterable<T>> |
groupBy(Function<T,U> f) |
static <U> JavaPairRDD<U,java.lang.Iterable<T>> |
groupBy(Function<T,U> f,
int numPartitions) |
static int |
id() |
JavaRDD<T> |
intersection(JavaRDD<T> other)
Return the intersection of this RDD and another one.
|
static boolean |
isCheckpointed() |
static boolean |
isEmpty() |
static java.util.Iterator<T> |
iterator(Partition split,
TaskContext taskContext) |
static <U> JavaPairRDD<U,T> |
keyBy(Function<T,U> f) |
static <R> JavaRDD<R> |
map(Function<T,R> f) |
static <U> JavaRDD<U> |
mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f) |
static <U> JavaRDD<U> |
mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f,
boolean preservesPartitioning) |
static JavaDoubleRDD |
mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f) |
static JavaDoubleRDD |
mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f,
boolean preservesPartitioning) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f,
boolean preservesPartitioning) |
static <R> JavaRDD<R> |
mapPartitionsWithIndex(Function2<java.lang.Integer,java.util.Iterator<T>,java.util.Iterator<R>> f,
boolean preservesPartitioning) |
static <R> boolean |
mapPartitionsWithIndex$default$2() |
static <R> JavaDoubleRDD |
mapToDouble(DoubleFunction<T> f) |
static <K2,V2> JavaPairRDD<K2,V2> |
mapToPair(PairFunction<T,K2,V2> f) |
static T |
max(java.util.Comparator<T> comp) |
static T |
min(java.util.Comparator<T> comp) |
static java.lang.String |
name() |
static Optional<Partitioner> |
partitioner() |
static java.util.List<Partition> |
partitions() |
JavaRDD<T> |
persist(StorageLevel newLevel)
Set this RDD's storage level to persist its values across operations after the first time
it is computed.
|
static JavaRDD<java.lang.String> |
pipe(java.util.List<java.lang.String> command) |
static JavaRDD<java.lang.String> |
pipe(java.util.List<java.lang.String> command,
java.util.Map<java.lang.String,java.lang.String> env) |
static JavaRDD<java.lang.String> |
pipe(java.util.List<java.lang.String> command,
java.util.Map<java.lang.String,java.lang.String> env,
boolean separateWorkingDir,
int bufferSize) |
static JavaRDD<java.lang.String> |
pipe(java.lang.String command) |
JavaRDD<T>[] |
randomSplit(double[] weights)
Randomly splits this RDD with the provided weights.
|
JavaRDD<T>[] |
randomSplit(double[] weights,
long seed)
Randomly splits this RDD with the provided weights.
|
RDD<T> |
rdd() |
static T |
reduce(Function2<T,T,T> f) |
JavaRDD<T> |
repartition(int numPartitions)
Return a new RDD that has exactly numPartitions partitions.
|
JavaRDD<T> |
sample(boolean withReplacement,
double fraction)
Return a sampled subset of this RDD.
|
JavaRDD<T> |
sample(boolean withReplacement,
double fraction,
long seed)
Return a sampled subset of this RDD.
|
static void |
saveAsObjectFile(java.lang.String path) |
static void |
saveAsTextFile(java.lang.String path) |
static void |
saveAsTextFile(java.lang.String path,
java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec) |
JavaRDD<T> |
setName(java.lang.String name)
Assign a name to this RDD
|
<S> JavaRDD<T> |
sortBy(Function<T,S> f,
boolean ascending,
int numPartitions)
Return this RDD sorted by the given key function.
|
JavaRDD<T> |
subtract(JavaRDD<T> other)
Return an RDD with the elements from
this that are not in other . |
JavaRDD<T> |
subtract(JavaRDD<T> other,
int numPartitions)
Return an RDD with the elements from
this that are not in other . |
JavaRDD<T> |
subtract(JavaRDD<T> other,
Partitioner p)
Return an RDD with the elements from
this that are not in other . |
static java.util.List<T> |
take(int num) |
static JavaFutureAction<java.util.List<T>> |
takeAsync(int num) |
static java.util.List<T> |
takeOrdered(int num) |
static java.util.List<T> |
takeOrdered(int num,
java.util.Comparator<T> comp) |
static java.util.List<T> |
takeSample(boolean withReplacement,
int num) |
static java.util.List<T> |
takeSample(boolean withReplacement,
int num,
long seed) |
static java.lang.String |
toDebugString() |
static java.util.Iterator<T> |
toLocalIterator() |
static java.util.List<T> |
top(int num) |
static java.util.List<T> |
top(int num,
java.util.Comparator<T> comp) |
static <T> RDD<T> |
toRDD(JavaRDD<T> rdd) |
java.lang.String |
toString() |
static <U> U |
treeAggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp) |
static <U> U |
treeAggregate(U zeroValue,
Function2<U,T,U> seqOp,
Function2<U,U,U> combOp,
int depth) |
static T |
treeReduce(Function2<T,T,T> f) |
static T |
treeReduce(Function2<T,T,T> f,
int depth) |
JavaRDD<T> |
union(JavaRDD<T> other)
Return the union of this RDD and another one.
|
JavaRDD<T> |
unpersist()
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
|
JavaRDD<T> |
unpersist(boolean blocking)
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
|
JavaRDD<T> |
wrapRDD(RDD<T> rdd) |
static <U> JavaPairRDD<T,U> |
zip(JavaRDDLike<U,?> other) |
static <U,V> JavaRDD<V> |
zipPartitions(JavaRDDLike<U,?> other,
FlatMapFunction2<java.util.Iterator<T>,java.util.Iterator<U>,V> f) |
static JavaPairRDD<T,java.lang.Long> |
zipWithIndex() |
static JavaPairRDD<T,java.lang.Long> |
zipWithUniqueId() |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
aggregate, cartesian, checkpoint, collect, collectAsync, collectPartitions, context, count, countApprox, countApprox, countApproxDistinct, countAsync, countByValue, countByValueApprox, countByValueApprox, first, flatMap, flatMapToDouble, flatMapToPair, fold, foreach, foreachAsync, foreachPartition, foreachPartitionAsync, getCheckpointFile, getNumPartitions, getStorageLevel, glom, groupBy, groupBy, id, isCheckpointed, isEmpty, iterator, keyBy, map, mapPartitions, mapPartitions, mapPartitionsToDouble, mapPartitionsToDouble, mapPartitionsToPair, mapPartitionsToPair, mapPartitionsWithIndex, mapToDouble, mapToPair, max, min, name, partitioner, partitions, pipe, pipe, pipe, pipe, reduce, saveAsObjectFile, saveAsTextFile, saveAsTextFile, take, takeAsync, takeOrdered, takeOrdered, takeSample, takeSample, toDebugString, toLocalIterator, top, top, treeAggregate, treeAggregate, treeReduce, treeReduce, zip, zipPartitions, zipWithIndex, zipWithUniqueId
public static java.util.List<Partition> partitions()
public static int getNumPartitions()
public static Optional<Partitioner> partitioner()
public static SparkContext context()
public static int id()
public static StorageLevel getStorageLevel()
public static java.util.Iterator<T> iterator(Partition split, TaskContext taskContext)
public static <R> JavaRDD<R> mapPartitionsWithIndex(Function2<java.lang.Integer,java.util.Iterator<T>,java.util.Iterator<R>> f, boolean preservesPartitioning)
public static <R> JavaDoubleRDD mapToDouble(DoubleFunction<T> f)
public static <K2,V2> JavaPairRDD<K2,V2> mapToPair(PairFunction<T,K2,V2> f)
public static <U> JavaRDD<U> flatMap(FlatMapFunction<T,U> f)
public static JavaDoubleRDD flatMapToDouble(DoubleFlatMapFunction<T> f)
public static <K2,V2> JavaPairRDD<K2,V2> flatMapToPair(PairFlatMapFunction<T,K2,V2> f)
public static <U> JavaRDD<U> mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f)
public static <U> JavaRDD<U> mapPartitions(FlatMapFunction<java.util.Iterator<T>,U> f, boolean preservesPartitioning)
public static JavaDoubleRDD mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f)
public static <K2,V2> JavaPairRDD<K2,V2> mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f)
public static JavaDoubleRDD mapPartitionsToDouble(DoubleFlatMapFunction<java.util.Iterator<T>> f, boolean preservesPartitioning)
public static <K2,V2> JavaPairRDD<K2,V2> mapPartitionsToPair(PairFlatMapFunction<java.util.Iterator<T>,K2,V2> f, boolean preservesPartitioning)
public static void foreachPartition(VoidFunction<java.util.Iterator<T>> f)
public static JavaRDD<java.util.List<T>> glom()
public static <U> JavaPairRDD<T,U> cartesian(JavaRDDLike<U,?> other)
public static <U> JavaPairRDD<U,java.lang.Iterable<T>> groupBy(Function<T,U> f)
public static <U> JavaPairRDD<U,java.lang.Iterable<T>> groupBy(Function<T,U> f, int numPartitions)
public static JavaRDD<java.lang.String> pipe(java.lang.String command)
public static JavaRDD<java.lang.String> pipe(java.util.List<java.lang.String> command)
public static JavaRDD<java.lang.String> pipe(java.util.List<java.lang.String> command, java.util.Map<java.lang.String,java.lang.String> env)
public static JavaRDD<java.lang.String> pipe(java.util.List<java.lang.String> command, java.util.Map<java.lang.String,java.lang.String> env, boolean separateWorkingDir, int bufferSize)
public static <U> JavaPairRDD<T,U> zip(JavaRDDLike<U,?> other)
public static <U,V> JavaRDD<V> zipPartitions(JavaRDDLike<U,?> other, FlatMapFunction2<java.util.Iterator<T>,java.util.Iterator<U>,V> f)
public static JavaPairRDD<T,java.lang.Long> zipWithUniqueId()
public static JavaPairRDD<T,java.lang.Long> zipWithIndex()
public static void foreach(VoidFunction<T> f)
public static java.util.List<T> collect()
public static java.util.Iterator<T> toLocalIterator()
public static java.util.List<T>[] collectPartitions(int[] partitionIds)
public static T reduce(Function2<T,T,T> f)
public static T treeReduce(Function2<T,T,T> f, int depth)
public static T treeReduce(Function2<T,T,T> f)
public static T fold(T zeroValue, Function2<T,T,T> f)
public static <U> U aggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp)
public static <U> U treeAggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp, int depth)
public static <U> U treeAggregate(U zeroValue, Function2<U,T,U> seqOp, Function2<U,U,U> combOp)
public static long count()
public static PartialResult<BoundedDouble> countApprox(long timeout, double confidence)
public static PartialResult<BoundedDouble> countApprox(long timeout)
public static java.util.Map<T,java.lang.Long> countByValue()
public static PartialResult<java.util.Map<T,BoundedDouble>> countByValueApprox(long timeout, double confidence)
public static PartialResult<java.util.Map<T,BoundedDouble>> countByValueApprox(long timeout)
public static java.util.List<T> take(int num)
public static java.util.List<T> takeSample(boolean withReplacement, int num)
public static java.util.List<T> takeSample(boolean withReplacement, int num, long seed)
public static T first()
public static boolean isEmpty()
public static void saveAsTextFile(java.lang.String path)
public static void saveAsTextFile(java.lang.String path, java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)
public static void saveAsObjectFile(java.lang.String path)
public static <U> JavaPairRDD<U,T> keyBy(Function<T,U> f)
public static void checkpoint()
public static boolean isCheckpointed()
public static Optional<java.lang.String> getCheckpointFile()
public static java.lang.String toDebugString()
public static java.util.List<T> top(int num, java.util.Comparator<T> comp)
public static java.util.List<T> top(int num)
public static java.util.List<T> takeOrdered(int num, java.util.Comparator<T> comp)
public static T max(java.util.Comparator<T> comp)
public static T min(java.util.Comparator<T> comp)
public static java.util.List<T> takeOrdered(int num)
public static long countApproxDistinct(double relativeSD)
public static java.lang.String name()
public static JavaFutureAction<java.lang.Long> countAsync()
public static JavaFutureAction<java.util.List<T>> collectAsync()
public static JavaFutureAction<java.util.List<T>> takeAsync(int num)
public static JavaFutureAction<java.lang.Void> foreachAsync(VoidFunction<T> f)
public static JavaFutureAction<java.lang.Void> foreachPartitionAsync(VoidFunction<java.util.Iterator<T>> f)
public static <R> boolean mapPartitionsWithIndex$default$2()
public scala.reflect.ClassTag<T> classTag()
public JavaRDD<T> persist(StorageLevel newLevel)
newLevel
- (undocumented)public JavaRDD<T> unpersist()
public JavaRDD<T> unpersist(boolean blocking)
blocking
- Whether to block until all blocks are deleted.public JavaRDD<T> distinct()
public JavaRDD<T> distinct(int numPartitions)
numPartitions
- (undocumented)public JavaRDD<T> filter(Function<T,java.lang.Boolean> f)
f
- (undocumented)public JavaRDD<T> coalesce(int numPartitions)
numPartitions
partitions.numPartitions
- (undocumented)public JavaRDD<T> coalesce(int numPartitions, boolean shuffle)
numPartitions
partitions.numPartitions
- (undocumented)shuffle
- (undocumented)public JavaRDD<T> repartition(int numPartitions)
Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data.
If you are decreasing the number of partitions in this RDD, consider using coalesce
,
which can avoid performing a shuffle.
numPartitions
- (undocumented)public JavaRDD<T> sample(boolean withReplacement, double fraction)
withReplacement
- can elements be sampled multiple times (replaced when sampled out)fraction
- expected size of the sample as a fraction of this RDD's size
without replacement: probability that each element is chosen; fraction must be [0, 1]
with replacement: expected number of times each element is chosen; fraction must be >= 0public JavaRDD<T> sample(boolean withReplacement, double fraction, long seed)
withReplacement
- can elements be sampled multiple times (replaced when sampled out)fraction
- expected size of the sample as a fraction of this RDD's size
without replacement: probability that each element is chosen; fraction must be [0, 1]
with replacement: expected number of times each element is chosen; fraction must be >= 0seed
- seed for the random number generatorpublic JavaRDD<T>[] randomSplit(double[] weights)
weights
- weights for splits, will be normalized if they don't sum to 1
public JavaRDD<T>[] randomSplit(double[] weights, long seed)
weights
- weights for splits, will be normalized if they don't sum to 1seed
- random seed
public JavaRDD<T> union(JavaRDD<T> other)
.distinct()
to eliminate them).other
- (undocumented)public JavaRDD<T> intersection(JavaRDD<T> other)
Note that this method performs a shuffle internally.
other
- (undocumented)public JavaRDD<T> subtract(JavaRDD<T> other)
this
that are not in other
.
Uses this
partitioner/partition size, because even if other
is huge, the resulting
RDD will be <= us.
other
- (undocumented)public JavaRDD<T> subtract(JavaRDD<T> other, int numPartitions)
this
that are not in other
.other
- (undocumented)numPartitions
- (undocumented)public JavaRDD<T> subtract(JavaRDD<T> other, Partitioner p)
this
that are not in other
.other
- (undocumented)p
- (undocumented)public java.lang.String toString()
toString
in class java.lang.Object