1 """
2 PySpark is a Python API for Spark.
3
4 Public classes:
5
6 - L{SparkContext<pyspark.context.SparkContext>}
7 Main entry point for Spark functionality.
8 - L{RDD<pyspark.rdd.RDD>}
9 A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
10 - L{Broadcast<pyspark.broadcast.Broadcast>}
11 A broadcast variable that gets reused across tasks.
12 - L{Accumulator<pyspark.accumulators.Accumulator>}
13 An "add-only" shared variable that tasks can only add values to.
14 - L{SparkFiles<pyspark.files.SparkFiles>}
15 Access files shipped with jobs.
16 """
17 import sys
18 import os
19 sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
20
21
22 from pyspark.context import SparkContext
23 from pyspark.rdd import RDD
24 from pyspark.files import SparkFiles
25
26
27 __all__ = ["SparkContext", "RDD", "SparkFiles"]
28