Package pyspark :: Module conf
[frames] | no frames]

Source Code for Module pyspark.conf

  1  # 
  2  # Licensed to the Apache Software Foundation (ASF) under one or more 
  3  # contributor license agreements.  See the NOTICE file distributed with 
  4  # this work for additional information regarding copyright ownership. 
  5  # The ASF licenses this file to You under the Apache License, Version 2.0 
  6  # (the "License"); you may not use this file except in compliance with 
  7  # the License.  You may obtain a copy of the License at 
  8  # 
  9  #    http://www.apache.org/licenses/LICENSE-2.0 
 10  # 
 11  # Unless required by applicable law or agreed to in writing, software 
 12  # distributed under the License is distributed on an "AS IS" BASIS, 
 13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 14  # See the License for the specific language governing permissions and 
 15  # limitations under the License. 
 16  # 
 17   
 18  """ 
 19  >>> from pyspark.conf import SparkConf 
 20  >>> from pyspark.context import SparkContext 
 21  >>> conf = SparkConf() 
 22  >>> conf.setMaster("local").setAppName("My app") 
 23  <pyspark.conf.SparkConf object at ...> 
 24  >>> conf.get("spark.master") 
 25  u'local' 
 26  >>> conf.get("spark.app.name") 
 27  u'My app' 
 28  >>> sc = SparkContext(conf=conf) 
 29  >>> sc.master 
 30  u'local' 
 31  >>> sc.appName 
 32  u'My app' 
 33  >>> sc.sparkHome is None 
 34  True 
 35   
 36  >>> conf = SparkConf(loadDefaults=False) 
 37  >>> conf.setSparkHome("/path") 
 38  <pyspark.conf.SparkConf object at ...> 
 39  >>> conf.get("spark.home") 
 40  u'/path' 
 41  >>> conf.setExecutorEnv("VAR1", "value1") 
 42  <pyspark.conf.SparkConf object at ...> 
 43  >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")]) 
 44  <pyspark.conf.SparkConf object at ...> 
 45  >>> conf.get("spark.executorEnv.VAR1") 
 46  u'value1' 
 47  >>> print conf.toDebugString() 
 48  spark.executorEnv.VAR1=value1 
 49  spark.executorEnv.VAR3=value3 
 50  spark.executorEnv.VAR4=value4 
 51  spark.home=/path 
 52  >>> sorted(conf.getAll(), key=lambda p: p[0]) 
 53  [(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \ 
 54  (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')] 
 55  """ 
 56   
 57   
58 -class SparkConf(object):
59 60 """ 61 Configuration for a Spark application. Used to set various Spark 62 parameters as key-value pairs. 63 64 Most of the time, you would create a SparkConf object with 65 C{SparkConf()}, which will load values from C{spark.*} Java system 66 properties as well. In this case, any parameters you set directly on 67 the C{SparkConf} object take priority over system properties. 68 69 For unit tests, you can also call C{SparkConf(false)} to skip 70 loading external settings and get the same configuration no matter 71 what the system properties are. 72 73 All setter methods in this class support chaining. For example, 74 you can write C{conf.setMaster("local").setAppName("My app")}. 75 76 Note that once a SparkConf object is passed to Spark, it is cloned 77 and can no longer be modified by the user. 78 """ 79
80 - def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
81 """ 82 Create a new Spark configuration. 83 84 @param loadDefaults: whether to load values from Java system 85 properties (True by default) 86 @param _jvm: internal parameter used to pass a handle to the 87 Java VM; does not need to be set by users 88 @param _jconf: Optionally pass in an existing SparkConf handle 89 to use its parameters 90 """ 91 if _jconf: 92 self._jconf = _jconf 93 else: 94 from pyspark.context import SparkContext 95 SparkContext._ensure_initialized() 96 _jvm = _jvm or SparkContext._jvm 97 self._jconf = _jvm.SparkConf(loadDefaults)
98
99 - def set(self, key, value):
100 """Set a configuration property.""" 101 self._jconf.set(key, unicode(value)) 102 return self
103
104 - def setIfMissing(self, key, value):
105 """Set a configuration property, if not already set.""" 106 if self.get(key) is None: 107 self.set(key, value) 108 return self
109
110 - def setMaster(self, value):
111 """Set master URL to connect to.""" 112 self._jconf.setMaster(value) 113 return self
114
115 - def setAppName(self, value):
116 """Set application name.""" 117 self._jconf.setAppName(value) 118 return self
119
120 - def setSparkHome(self, value):
121 """Set path where Spark is installed on worker nodes.""" 122 self._jconf.setSparkHome(value) 123 return self
124
125 - def setExecutorEnv(self, key=None, value=None, pairs=None):
126 """Set an environment variable to be passed to executors.""" 127 if (key is not None and pairs is not None) or (key is None and pairs is None): 128 raise Exception("Either pass one key-value pair or a list of pairs") 129 elif key is not None: 130 self._jconf.setExecutorEnv(key, value) 131 elif pairs is not None: 132 for (k, v) in pairs: 133 self._jconf.setExecutorEnv(k, v) 134 return self
135
136 - def setAll(self, pairs):
137 """ 138 Set multiple parameters, passed as a list of key-value pairs. 139 140 @param pairs: list of key-value pairs to set 141 """ 142 for (k, v) in pairs: 143 self._jconf.set(k, v) 144 return self
145
146 - def get(self, key, defaultValue=None):
147 """Get the configured value for some key, or return a default otherwise.""" 148 if defaultValue is None: # Py4J doesn't call the right get() if we pass None 149 if not self._jconf.contains(key): 150 return None 151 return self._jconf.get(key) 152 else: 153 return self._jconf.get(key, defaultValue)
154
155 - def getAll(self):
156 """Get all values as a list of key-value pairs.""" 157 pairs = [] 158 for elem in self._jconf.getAll(): 159 pairs.append((elem._1(), elem._2())) 160 return pairs
161
162 - def contains(self, key):
163 """Does this configuration contain a given key?""" 164 return self._jconf.contains(key)
165
166 - def toDebugString(self):
167 """ 168 Returns a printable version of the configuration, as a list of 169 key=value pairs, one per line. 170 """ 171 return self._jconf.toDebugString()
172 173
174 -def _test():
175 import doctest 176 (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) 177 if failure_count: 178 exit(-1)
179 180 181 if __name__ == "__main__": 182 _test() 183