Package pyspark
[frames] | no frames]

Source Code for Package pyspark

 1  # 
 2  # Licensed to the Apache Software Foundation (ASF) under one or more 
 3  # contributor license agreements.  See the NOTICE file distributed with 
 4  # this work for additional information regarding copyright ownership. 
 5  # The ASF licenses this file to You under the Apache License, Version 2.0 
 6  # (the "License"); you may not use this file except in compliance with 
 7  # the License.  You may obtain a copy of the License at 
 8  # 
 9  #    http://www.apache.org/licenses/LICENSE-2.0 
10  # 
11  # Unless required by applicable law or agreed to in writing, software 
12  # distributed under the License is distributed on an "AS IS" BASIS, 
13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
14  # See the License for the specific language governing permissions and 
15  # limitations under the License. 
16  # 
17   
18  """ 
19  PySpark is the Python API for Spark. 
20   
21  Public classes: 
22   
23    - L{SparkContext<pyspark.context.SparkContext>} 
24        Main entry point for Spark functionality. 
25    - L{RDD<pyspark.rdd.RDD>} 
26        A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. 
27    - L{Broadcast<pyspark.broadcast.Broadcast>} 
28        A broadcast variable that gets reused across tasks. 
29    - L{Accumulator<pyspark.accumulators.Accumulator>} 
30        An "add-only" shared variable that tasks can only add values to. 
31    - L{SparkConf<pyspark.conf.SparkConf>} 
32        For configuring Spark. 
33    - L{SparkFiles<pyspark.files.SparkFiles>} 
34        Access files shipped with jobs. 
35    - L{StorageLevel<pyspark.storagelevel.StorageLevel>} 
36        Finer-grained cache persistence levels. 
37  """ 
38   
39   
40   
41  import sys 
42  import os 
43  sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) 
44   
45   
46  from pyspark.conf import SparkConf 
47  from pyspark.context import SparkContext 
48  from pyspark.rdd import RDD 
49  from pyspark.files import SparkFiles 
50  from pyspark.storagelevel import StorageLevel 
51   
52   
53  __all__ = ["SparkConf", "SparkContext", "RDD", "SparkFiles", "StorageLevel"] 
54