Package pyspark :: Module broadcast
[frames] | no frames]

Source Code for Module pyspark.broadcast

 1  """ 
 2  >>> from pyspark.context import SparkContext 
 3  >>> sc = SparkContext('local', 'test') 
 4  >>> b = sc.broadcast([1, 2, 3, 4, 5]) 
 5  >>> b.value 
 6  [1, 2, 3, 4, 5] 
 7   
 8  >>> from pyspark.broadcast import _broadcastRegistry 
 9  >>> _broadcastRegistry[b.bid] = b 
10  >>> from cPickle import dumps, loads 
11  >>> loads(dumps(b)).value 
12  [1, 2, 3, 4, 5] 
13   
14  >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect() 
15  [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] 
16   
17  >>> large_broadcast = sc.broadcast(list(range(10000))) 
18  """ 
19  # Holds broadcasted data received from Java, keyed by its id. 
20  _broadcastRegistry = {} 
21   
22   
23 -def _from_id(bid):
24 from pyspark.broadcast import _broadcastRegistry 25 if bid not in _broadcastRegistry: 26 raise Exception("Broadcast variable '%s' not loaded!" % bid) 27 return _broadcastRegistry[bid]
28 29
30 -class Broadcast(object):
31 - def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
32 self.value = value 33 self.bid = bid 34 self._jbroadcast = java_broadcast 35 self._pickle_registry = pickle_registry
36
37 - def __reduce__(self):
38 self._pickle_registry.add(self) 39 return (_from_id, (self.bid, ))
40