싸랑하는친구여

import os from impala.dbapi import connect import pandas as pd import numpy as np def db_close(cur, conn): cur.close() conn.close() print(os.getcwd()) conn = connect(host='192.168.91.156', port=21050) cursor = conn.cursor() # fetch schema list df = pd.read_sql("SHOW SCHEMAS", conn) # DF TO LIST dbList = [] # 2차원 배열 형태가 된다. # dbList = df.values.tolist() for i in range(len(df)): dbList.append(df.loc[i, "name"]) # print(len(dbList)) # print(dbList) tabledf = pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False) for i in range(len(dbList)): dbname = dbList[i] # print(dbname ) df = pd.read_sql("SHOW TABLES IN " + dbname, conn) # 리스트 타입이 아닌경우 에러발생 # df.loc[:,'owner'] = 'aaa' tmpOwnerList = [] for j in range(len(df)): tmpOwnerList.append(dbname) df.loc[:, "OWNER"] = tmpOwnerList # 컬럼 순서 변경 df = df[["OWNER", "name"]] # 컬럼이름 바꾸기 df = df.rename(columns={'name': 'TAB_ID'}) tabledf = tabledf.append(df, ignore_index=True) # 합성컬럼 생성 tabledf["ID"] = tabledf["OWNER"] + "." + tabledf["TAB_ID"] # 데이터 프레임에서 값만을 추출 test = tabledf.loc[5,"ID"] # print( "1번인덱스 ", type(test) ) # 데이터 프레임 구조로 행열에 해당하는 값 추출 test = tabledf.loc[[5],["ID"]] # print( "1번인덱스 ", type(test) ) colList = [] for i in range(len(tabledf)): id = tabledf.loc[i, "ID"] tableOwner = tabledf.loc[i, "OWNER"] tableId = tabledf.loc[i, "TAB_ID"] # print("key : ",key) # print("tableOwner : ",tableOwner) # print("tableId : ",tableId) df2 = pd.read_sql("DESCRIBE "+ id, conn) # 동일값으로 리스트 생성 idList = [id for i in range(len(df2))] tableOwnerList = [tableOwner for i in range(len(df2))] tableIdList = [tableId for i in range(len(df2))] df2.loc[:, "ID"] = idList df2.loc[:, "OWNER"] = tableOwnerList df2.loc[:, "TAB_ID"] = tableIdList df2.loc[:, "COL_NO"] = df2.index + 1 colList.append(df2) db_close(cursor, conn) # 데이터프레임병합 및 인덱스 재배열 coldf = pd.concat(colList, ignore_index=True) # 데이터프레임 전체 열 이름 조회 # ['name', 'type', 'comment', 'primary_key', 'nullable', 'default_value', # 'encoding', 'compression', 'block_size', 'ID', 'OWNER', 'TAB_ID', # 'COL_NO'] # print(coldf.columns) # coldf.head() # coldf.shape # print(coldf.head()) # coldf.reset_index(drop=True) for i in range(len(coldf)): # name = coldf.loc[i, "name"] # print(i," 번째 test name :", name) # id = coldf.loc[i, "ID"] # print(i," 번째 test id :", id) typeStr = coldf.loc[ i , "type"] # print(gubun) # 컬럼 생성하여 컬럼 타입과 크기 분리 if typeStr.find("(") > 0: start_idx = typeStr.index("(") end_idx = typeStr.index(")") width = typeStr[start_idx + 1:end_idx] type = typeStr[0:start_idx] coldf.loc[i, "WIDTH"] = width coldf.loc[i, "type"] = type else: coldf.loc[i, "WIDTH"] = "" # nullable 값 변환 if typeStr.find("true") > 0: coldf.loc[i, "nullable"] = "NOT NULL" else: coldf.loc[i, "nullable"] = "" # i+1번째 열 이름 선택 # coldf.columns[i] # print( coldf.head() ) # # # 컬럼이름 바꾸기 coldf = coldf.rename(columns={'name': 'COL_ID', 'type': 'TYPE', 'comment': 'Comments', "primary_key":"PK", "nullable":"NULLS" }) # # 컬럼 순서 변경 coldf = coldf[["ID", "OWNER", "TAB_ID", "COL_NO", "COL_ID", "Comments", "TYPE","WIDTH", "NULLS","default_value" ]] # # # 엑셀로 저장 coldf.to_excel(excel_writer='Z:/20.Dev/01.Python/02.download/columnList.xlsx')