import streamlit as st
from PyPDF2 import PdfReader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferWindowMemory
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
# PDF 문서에서 텍스트 추출
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
# 텍스트를 청크로 나누기
def get_text_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n"],
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
# 임베딩 처리 및 벡터스토어 생성
def get_vectorstore(text_chunks):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
# 챗봇 대화 체인 생성
def get_conversation_chain(vectorstore):
memory = ConversationBufferWindowMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=ChatOllama(model="llama3-ko", temperature=0),
retriever=vectorstore.as_retriever(),
memory=memory,
get_chat_history=lambda h: h,
)
return conversation_chain
# Streamlit 인터페이스
st.title("📄 PDF 기반 챗봇 (llama3-ko)")
st.divider()
user_uploads = st.file_uploader("파일을 업로드해주세요~", accept_multiple_files=True)
if user_uploads is not None:
if st.button("Upload"):
with st.spinner("처리중.."):
raw_text = get_pdf_text(user_uploads)
text_chunks = get_text_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks)
st.session_state.conversation = get_conversation_chain(vectorstore)
if user_query := st.chat_input("질문을 입력해주세요~"):
if 'conversation' in st.session_state:
result = st.session_state.conversation({
"question": user_query,
"chat_history": st.session_state.get('chat_history', [])
})
response = result["answer"]
else:
response = "먼저 문서를 업로드해주세요~."
with st.chat_message("assistant"):
st.write(response)