Package pyspark :: Module conf
[frames] | no frames]

Source Code for Module pyspark.conf

  1  # 
  2  # Licensed to the Apache Software Foundation (ASF) under one or more 
  3  # contributor license agreements.  See the NOTICE file distributed with 
  4  # this work for additional information regarding copyright ownership. 
  5  # The ASF licenses this file to You under the Apache License, Version 2.0 
  6  # (the "License"); you may not use this file except in compliance with 
  7  # the License.  You may obtain a copy of the License at 
  8  # 
  9  #    http://www.apache.org/licenses/LICENSE-2.0 
 10  # 
 11  # Unless required by applicable law or agreed to in writing, software 
 12  # distributed under the License is distributed on an "AS IS" BASIS, 
 13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 14  # See the License for the specific language governing permissions and 
 15  # limitations under the License. 
 16  # 
 17   
 18  """ 
 19  >>> from pyspark.conf import SparkConf 
 20  >>> from pyspark.context import SparkContext 
 21  >>> conf = SparkConf() 
 22  >>> conf.setMaster("local").setAppName("My app") 
 23  <pyspark.conf.SparkConf object at ...> 
 24  >>> conf.get("spark.master") 
 25  u'local' 
 26  >>> conf.get("spark.app.name") 
 27  u'My app' 
 28  >>> sc = SparkContext(conf=conf) 
 29  >>> sc.master 
 30  u'local' 
 31  >>> sc.appName 
 32  u'My app' 
 33  >>> sc.sparkHome == None 
 34  True 
 35   
 36  >>> conf = SparkConf(loadDefaults=False) 
 37  >>> conf.setSparkHome("/path") 
 38  <pyspark.conf.SparkConf object at ...> 
 39  >>> conf.get("spark.home") 
 40  u'/path' 
 41  >>> conf.setExecutorEnv("VAR1", "value1") 
 42  <pyspark.conf.SparkConf object at ...> 
 43  >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")]) 
 44  <pyspark.conf.SparkConf object at ...> 
 45  >>> conf.get("spark.executorEnv.VAR1") 
 46  u'value1' 
 47  >>> print conf.toDebugString() 
 48  spark.executorEnv.VAR1=value1 
 49  spark.executorEnv.VAR3=value3 
 50  spark.executorEnv.VAR4=value4 
 51  spark.home=/path 
 52  >>> sorted(conf.getAll(), key=lambda p: p[0]) 
 53  [(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')] 
 54  """ 
 55   
 56   
57 -class SparkConf(object):
58 """ 59 Configuration for a Spark application. Used to set various Spark 60 parameters as key-value pairs. 61 62 Most of the time, you would create a SparkConf object with 63 C{SparkConf()}, which will load values from C{spark.*} Java system 64 properties as well. In this case, any parameters you set directly on 65 the C{SparkConf} object take priority over system properties. 66 67 For unit tests, you can also call C{SparkConf(false)} to skip 68 loading external settings and get the same configuration no matter 69 what the system properties are. 70 71 All setter methods in this class support chaining. For example, 72 you can write C{conf.setMaster("local").setAppName("My app")}. 73 74 Note that once a SparkConf object is passed to Spark, it is cloned 75 and can no longer be modified by the user. 76 """ 77
78 - def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
79 """ 80 Create a new Spark configuration. 81 82 @param loadDefaults: whether to load values from Java system 83 properties (True by default) 84 @param _jvm: internal parameter used to pass a handle to the 85 Java VM; does not need to be set by users 86 @param _jconf: Optionally pass in an existing SparkConf handle 87 to use its parameters 88 """ 89 if _jconf: 90 self._jconf = _jconf 91 else: 92 from pyspark.context import SparkContext 93 SparkContext._ensure_initialized() 94 _jvm = _jvm or SparkContext._jvm 95 self._jconf = _jvm.SparkConf(loadDefaults)
96
97 - def set(self, key, value):
98 """Set a configuration property.""" 99 self._jconf.set(key, unicode(value)) 100 return self
101
102 - def setMaster(self, value):
103 """Set master URL to connect to.""" 104 self._jconf.setMaster(value) 105 return self
106
107 - def setAppName(self, value):
108 """Set application name.""" 109 self._jconf.setAppName(value) 110 return self
111
112 - def setSparkHome(self, value):
113 """Set path where Spark is installed on worker nodes.""" 114 self._jconf.setSparkHome(value) 115 return self
116
117 - def setExecutorEnv(self, key=None, value=None, pairs=None):
118 """Set an environment variable to be passed to executors.""" 119 if (key != None and pairs != None) or (key == None and pairs == None): 120 raise Exception("Either pass one key-value pair or a list of pairs") 121 elif key != None: 122 self._jconf.setExecutorEnv(key, value) 123 elif pairs != None: 124 for (k, v) in pairs: 125 self._jconf.setExecutorEnv(k, v) 126 return self
127
128 - def setAll(self, pairs):
129 """ 130 Set multiple parameters, passed as a list of key-value pairs. 131 132 @param pairs: list of key-value pairs to set 133 """ 134 for (k, v) in pairs: 135 self._jconf.set(k, v) 136 return self
137
138 - def get(self, key, defaultValue=None):
139 """Get the configured value for some key, or return a default otherwise.""" 140 if defaultValue == None: # Py4J doesn't call the right get() if we pass None 141 if not self._jconf.contains(key): 142 return None 143 return self._jconf.get(key) 144 else: 145 return self._jconf.get(key, defaultValue)
146
147 - def getAll(self):
148 """Get all values as a list of key-value pairs.""" 149 pairs = [] 150 for elem in self._jconf.getAll(): 151 pairs.append((elem._1(), elem._2())) 152 return pairs
153
154 - def contains(self, key):
155 """Does this configuration contain a given key?""" 156 return self._jconf.contains(key)
157
158 - def toDebugString(self):
159 """ 160 Returns a printable version of the configuration, as a list of 161 key=value pairs, one per line. 162 """ 163 return self._jconf.toDebugString()
164 165
166 -def _test():
167 import doctest 168 (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) 169 if failure_count: 170 exit(-1)
171 172 173 if __name__ == "__main__": 174 _test() 175