用户定义的 Python 累加器
定义 AccumulatorParam:
from pyspark import AccumulatorParam
class StringAccumulator(AccumulatorParam):
    def zero(self, s):
        return s
    def addInPlace(self, s1, s2):
        return s1 + s2
accumulator = sc.accumulator("", StringAccumulator())
def add(x): 
    global accumulator
    accumulator += x
sc.parallelize(["a", "b", "c"]).foreach(add)