scott에서 시작
(py389) [scott@centos ~]$ cp Case.csv Case.txt
(py389) [scott@centos ~]$ spark-shell # 스칼라로 들어옴
scala> val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
scala> sql("drop table case1") # 원할한 진행을 위해 테이블 드랍 먼저 시행
scala> sqlContext.sql( "CREATE TABLE IF NOT EXISTS case1(case_id int, province string, city string, group string, infection_case string, confirmed string, latitude string, longitude string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ")
scala> sqlContext.sql("LOAD DATA LOCAL INPATH 'Case.txt' INTO TABLE case1")
scala> sql("""select province, sum(confirmed)
from case1
group by province""").coalesce(1).write.option("header","true").option("sep",",").mode("overwrite").csv("/home/scott/dd")
scala > :q #스칼라 빠져나옴
(py389) [scott@centos ~]$ cd dd #dd 디렉토리로 들어감
(py389) [scott@centos dd]$ ls
part-r-00000-4bd2fd7b-f1f8-443a-b38f-bc1bf914ffe5.csv _SUCCESS
(py389) [scott@centos dd]$ mv part-r-00000-4bd2fd7b-f1f8-443a-b38f-bc1bf914ffe5.csv case1.csv #파일이름을 case1.csv로 변경
<spyder 창에서>
import pandas as pd
case = pd.read_csv("/home/scott/dd/case1.csv")
result=case['sum(CAST(confirmed AS DOUBLE))']
result.index=case['province']
result.plot(kind='bar', color='green')