(py389) [scott@centos archive]$ spark-shell
scala>
sqlContext.sql( "create table IF NOT EXISTS case_1(case_id int, province string, city string, group string, infection_case string, confirmed int, latitude float, longitude float) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ")
scala>
sqlContext.sql("LOAD DATA LOCAL INPATH '/home/scott/archive/Case.csv' INTO TABLE case_1")
scala>
sql("""select province, sum(confirmed) from case_1 group by province""").coalesce(1).write.option("header","False").option("sep",",").mode("overwrite").csv("/home/scott/dd3")
putty_Python>
import pandas as pd
case = pd.read_csv("/home/scott/dd3/sample01", header=None)
res = case[1]
res.index=case[0]
res.plot(kind='bar', color='gray')
↓
[graph]