Scenario : As we all aware chatgpt only provide public data available upto Sepetember 2021. We have requirements to use our own private data and use the functions of chatgpt and lanaguage models.
Software Required:
# Python : [ https://www.python.org/downloads/]
# Visual Studio Code : [https://code.visualstudio.com/]
# ChatGPT API Key : [https://platform.openai.com/account/api-keys] Make sure to have paid subscription to avoid rate limiting issues.
Steps :
Step 1: Install Python
Step 2 : Install Visual Studio Code
Step 3 : Install a Python interpreter for visual code Refer [ https://code.visualstudio.com/docs/python/python-tutorial ]
Step 4 : Create a python environment :
python -m venv ChatGPT
Step 5: Activate enivaronment created
.\ChatGPT\Scripts\Activate.ps1
Step 6 : Install Python Packages required
pip install streamlit
pip install python-dotenv
pip install pickle
pip install PyPDF2
pip install streamlit-extras
pip install langchain
pip install Python-IO
pip install openai
pip install tiktoken
Step 7 : Create .env file and insert the api key
OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxx
# Create folder and name it data
# copy logo.png file to same folder
Step 8 : Run the code
import streamlit as st
from dotenv import load_dotenv
from io import StringIO
import pickle
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from streamlit_extras.app_logo import add_logo
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os
# .\chatgpt\Scripts\Activate.ps1
#streamlit run index.py
load_dotenv()
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
with st.sidebar:
st.image("logo.png")
st.title('Private ChatBOT')
st.markdown('''
## ChatBOT
This app built by:
- [Haneef Puttur](https://haneefputtur.com/)
''')
def main():
k=[]
p = os.listdir('data')
for m in p:
k.append(m.replace(".pkl",""))
k.append('upload')
option = st.selectbox('Select Chat Agent?', k)
if (option == 'upload') :
uploadpdf()
else:
startchat(option)
def startchat(filename):
st.header("Chat with "+filename)
store_name = "data/"+filename
if os.path.exists(f"{store_name}.pkl"):
with open(f"{store_name}.pkl", "rb") as f:
VectorStore = pickle.load(f)
#st.write('Embeddings Loaded from the Disk')
else:
st.write('Embeddings Does not Exist')
query = st.text_input("Ask questions about UD Staff handbook")
# st.write(query)
if query:
docs = VectorStore.similarity_search(query=query, k=3)
llm = OpenAI()
chain = load_qa_chain(llm=llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=query)
print(cb)
st.write(response)
def uploadpdf():
pdf = st.file_uploader("Upload your pdf or txt files", type=['pdf', 'txt'] )
if pdf is not None:
text = ""
ex = pdf.name[:-3]
if (ex =="pdf"):
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
else :
stringio = StringIO(pdf.getvalue().decode("utf-8"))
# To read file as string:
text = stringio.read()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text=text)
#st.write(chunks)
store_name = "data/"+pdf.name[:-4]
store_name = store_name.replace(" ","_")
st.write(f'{store_name}')
if os.path.exists(f"{store_name}.pkl"):
with open(f"{store_name}.pkl", "rb") as f:
VectorStore = pickle.load(f)
st.write('Embeddings Loaded from the Disk')
else:
embeddings = OpenAIEmbeddings()
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
here = os.path.dirname(os.path.abspath(__file__))
with open(f"{store_name}.pkl", "wb") as f:
pickle.dump(VectorStore, f)
st.write('Embeddings Saved to the Disk')
main()
if __name__ == '__main__':
main()