import os
PYSPARK_PYTHON = “/root/miniconda3/bin/python3” JAVA_HOME=’/root/bigdata/jdk1.8.0_181’
os.environ[“PYSPARK_PYTHON”] = PYSPARK_PYTHON os.environ[“PYSPARK_DRIVER_PYTHON”] = PYSPARK_PYTHON os.environ[‘JAVA_HOME’]=JAVA_HOME
from pyspark import SparkConf from pyspark.sql import SparkSession
SPARK_APP_NAME = “preprocessingBehaviorLog” SPARK_URL = “spark://192.168.199.126:7077”
conf = SparkConf() # 创建spark config对象 config = ( (“spark.app.name”, SPARK_APP_NAME), # 设置启动的spark的app名称,没有提供,将随机产生一个名称 (“spark.executor.memory”, “2g”), # 设置该app启动时占用的内存用量,默认1g (“spark.master”, SPARK_URL), # spark master的地址 (“spark.executor.cores”, “2”), # 设置spark executor使用的CPU核心数 # 以下三项配置,可以控制执行器数量
)
conf.setAll(config)
spark = SparkSession.builder.config(conf=conf).getOrCreate()