Package pyspark :: Module conf
[frames] | no frames]

Source Code for Module pyspark.conf

  1  # 
  2  # Licensed to the Apache Software Foundation (ASF) under one or more 
  3  # contributor license agreements.  See the NOTICE file distributed with 
  4  # this work for additional information regarding copyright ownership. 
  5  # The ASF licenses this file to You under the Apache License, Version 2.0 
  6  # (the "License"); you may not use this file except in compliance with 
  7  # the License.  You may obtain a copy of the License at 
  8  # 
  9  #    http://www.apache.org/licenses/LICENSE-2.0 
 10  # 
 11  # Unless required by applicable law or agreed to in writing, software 
 12  # distributed under the License is distributed on an "AS IS" BASIS, 
 13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 14  # See the License for the specific language governing permissions and 
 15  # limitations under the License. 
 16  # 
 17   
 18  """ 
 19  >>> from pyspark.conf import SparkConf 
 20  >>> from pyspark.context import SparkContext 
 21  >>> conf = SparkConf() 
 22  >>> conf.setMaster("local").setAppName("My app") 
 23  <pyspark.conf.SparkConf object at ...> 
 24  >>> conf.get("spark.master") 
 25  u'local' 
 26  >>> conf.get("spark.app.name") 
 27  u'My app' 
 28  >>> sc = SparkContext(conf=conf) 
 29  >>> sc.master 
 30  u'local' 
 31  >>> sc.appName 
 32  u'My app' 
 33  >>> sc.sparkHome == None 
 34  True 
 35   
 36  >>> conf = SparkConf() 
 37  >>> conf.setSparkHome("/path") 
 38  <pyspark.conf.SparkConf object at ...> 
 39  >>> conf.get("spark.home") 
 40  u'/path' 
 41  >>> conf.setExecutorEnv("VAR1", "value1") 
 42  <pyspark.conf.SparkConf object at ...> 
 43  >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")]) 
 44  <pyspark.conf.SparkConf object at ...> 
 45  >>> conf.get("spark.executorEnv.VAR1") 
 46  u'value1' 
 47  >>> print conf.toDebugString() 
 48  spark.executorEnv.VAR1=value1 
 49  spark.executorEnv.VAR3=value3 
 50  spark.executorEnv.VAR4=value4 
 51  spark.home=/path 
 52  >>> sorted(conf.getAll(), key=lambda p: p[0]) 
 53  [(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')] 
 54  """ 
 55   
 56   
57 -class SparkConf(object):
58 """ 59 Configuration for a Spark application. Used to set various Spark 60 parameters as key-value pairs. 61 62 Most of the time, you would create a SparkConf object with 63 C{SparkConf()}, which will load values from C{spark.*} Java system 64 properties as well. In this case, any parameters you set directly on 65 the C{SparkConf} object take priority over system properties. 66 67 For unit tests, you can also call C{SparkConf(false)} to skip 68 loading external settings and get the same configuration no matter 69 what the system properties are. 70 71 All setter methods in this class support chaining. For example, 72 you can write C{conf.setMaster("local").setAppName("My app")}. 73 74 Note that once a SparkConf object is passed to Spark, it is cloned 75 and can no longer be modified by the user. 76 """ 77
78 - def __init__(self, loadDefaults=True, _jvm=None):
79 """ 80 Create a new Spark configuration. 81 82 @param loadDefaults: whether to load values from Java system 83 properties (True by default) 84 @param _jvm: internal parameter used to pass a handle to the 85 Java VM; does not need to be set by users 86 """ 87 from pyspark.context import SparkContext 88 SparkContext._ensure_initialized() 89 _jvm = _jvm or SparkContext._jvm 90 self._jconf = _jvm.SparkConf(loadDefaults)
91
92 - def set(self, key, value):
93 """Set a configuration property.""" 94 self._jconf.set(key, unicode(value)) 95 return self
96
97 - def setMaster(self, value):
98 """Set master URL to connect to.""" 99 self._jconf.setMaster(value) 100 return self
101
102 - def setAppName(self, value):
103 """Set application name.""" 104 self._jconf.setAppName(value) 105 return self
106
107 - def setSparkHome(self, value):
108 """Set path where Spark is installed on worker nodes.""" 109 self._jconf.setSparkHome(value) 110 return self
111
112 - def setExecutorEnv(self, key=None, value=None, pairs=None):
113 """Set an environment variable to be passed to executors.""" 114 if (key != None and pairs != None) or (key == None and pairs == None): 115 raise Exception("Either pass one key-value pair or a list of pairs") 116 elif key != None: 117 self._jconf.setExecutorEnv(key, value) 118 elif pairs != None: 119 for (k, v) in pairs: 120 self._jconf.setExecutorEnv(k, v) 121 return self
122
123 - def setAll(self, pairs):
124 """ 125 Set multiple parameters, passed as a list of key-value pairs. 126 127 @param pairs: list of key-value pairs to set 128 """ 129 for (k, v) in pairs: 130 self._jconf.set(k, v) 131 return self
132
133 - def get(self, key, defaultValue=None):
134 """Get the configured value for some key, or return a default otherwise.""" 135 if defaultValue == None: # Py4J doesn't call the right get() if we pass None 136 if not self._jconf.contains(key): 137 return None 138 return self._jconf.get(key) 139 else: 140 return self._jconf.get(key, defaultValue)
141
142 - def getAll(self):
143 """Get all values as a list of key-value pairs.""" 144 pairs = [] 145 for elem in self._jconf.getAll(): 146 pairs.append((elem._1(), elem._2())) 147 return pairs
148
149 - def contains(self, key):
150 """Does this configuration contain a given key?""" 151 return self._jconf.contains(key)
152
153 - def toDebugString(self):
154 """ 155 Returns a printable version of the configuration, as a list of 156 key=value pairs, one per line. 157 """ 158 return self._jconf.toDebugString()
159 160
161 -def _test():
162 import doctest 163 (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) 164 if failure_count: 165 exit(-1)
166 167 168 if __name__ == "__main__": 169 _test() 170