Create a private ChatGPT with own documents

Scenario : As we all aware chatgpt only provide public data available upto Sepetember 2021. We have requirements to use our own private data and use the functions of chatgpt and lanaguage models.

Software Required:

# Python : [ https://www.python.org/downloads/]

# Visual Studio Code : [https://code.visualstudio.com/]

# ChatGPT API Key : [https://platform.openai.com/account/api-keys] Make sure to have paid subscription to avoid rate limiting issues.

Steps :

Step 1: Install Python

Step 2 : Install Visual Studio Code

Step 3 : Install a Python interpreter for visual code Refer [ https://code.visualstudio.com/docs/python/python-tutorial ]

Step 4 : Create a python environment :

python -m venv ChatGPT

Step 5: Activate enivaronment created
.\ChatGPT\Scripts\Activate.ps1

Step 6 : Install Python Packages required

pip install streamlit
pip install python-dotenv
pip install pickle
pip install PyPDF2
pip install streamlit-extras
pip install langchain
pip install Python-IO

pip install openai

pip install tiktoken

Step 7 : Create .env file and insert the api key

OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxx

# Create folder and name it data

# copy logo.png file to same folder

Step 8 : Run the code

import streamlit as st
from dotenv import load_dotenv
from io import StringIO
import pickle
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from streamlit_extras.app_logo import add_logo
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os

# .\chatgpt\Scripts\Activate.ps1
#streamlit run index.py
load_dotenv()

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

with st.sidebar:
    st.image("logo.png")
    st.title('Private ChatBOT')
    st.markdown('''
    ## ChatBOT
    This app built by:
    - [Haneef Puttur](https://haneefputtur.com/)
    
  
    ''')
def main():
    
    k=[]
    p = os.listdir('data')
    for m in p:
        k.append(m.replace(".pkl",""))
    
    k.append('upload')
 
    option = st.selectbox('Select Chat Agent?', k)
    
    if (option == 'upload') :
        uploadpdf()
    else:
        startchat(option)


def startchat(filename):
    st.header("Chat with "+filename) 
        
    store_name = "data/"+filename
        

    if os.path.exists(f"{store_name}.pkl"):
        with open(f"{store_name}.pkl", "rb") as f:
            VectorStore = pickle.load(f)
            #st.write('Embeddings Loaded from the Disk')
    else:
        st.write('Embeddings Does not Exist')

    query = st.text_input("Ask questions about UD Staff handbook")
        # st.write(query)
 
    if query:
        docs = VectorStore.similarity_search(query=query, k=3)
 
        llm = OpenAI()
        chain = load_qa_chain(llm=llm, chain_type="stuff")
        with get_openai_callback() as cb:
            response = chain.run(input_documents=docs, question=query)
            print(cb)
        st.write(response)

def uploadpdf():
    pdf = st.file_uploader("Upload your pdf or txt files", type=['pdf', 'txt'] )

    if pdf is not None:
        text = ""
        ex = pdf.name[:-3]
        if (ex =="pdf"):
            pdf_reader = PdfReader(pdf)      
        
            for page in pdf_reader.pages:
                text += page.extract_text()
        else :
            stringio = StringIO(pdf.getvalue().decode("utf-8"))

            # To read file as string:
            text = stringio.read()
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
            )
        chunks = text_splitter.split_text(text=text)
        #st.write(chunks)
    
        store_name = "data/"+pdf.name[:-4]
        store_name = store_name.replace(" ","_")
        st.write(f'{store_name}')

        if os.path.exists(f"{store_name}.pkl"):
            with open(f"{store_name}.pkl", "rb") as f:
                VectorStore = pickle.load(f)
            st.write('Embeddings Loaded from the Disk')
        else:
            embeddings = OpenAIEmbeddings()
            VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
            here = os.path.dirname(os.path.abspath(__file__))
            
            with open(f"{store_name}.pkl", "wb") as f:
                pickle.dump(VectorStore, f)
            st.write('Embeddings Saved to the Disk')
        main()

if __name__ == '__main__':
        main()