'''
pyspark本地运行:环境变量确保有 SPAKR_HOME启动hadoop 否则报错 localhost:9000 端口被拒绝不需要启动本地spark
'''
from pyspark.sql import SparkSession
from pyspark.sql import Row
from datetime import datetime, datespark = SparkSession.builder.getOrCreate()
df = spark.createDataFrame([Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)),Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)),Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0))
])
df.show()