# %% - - visual studio code cell - - # pip install langchain-text-splitters from langchain_text_splitters import RecursiveCharacterTextSplitter text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) with open('wiki_game_awards_2025.txt', encoding='utf-8') as f: text1 = text_splitter.split_text(f.read()) # %% - - visual studio code cell - - text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) with open('wiki_98th_oscars.txt', encoding='utf-8') as f: text2 = text_splitter.split_text(f.read()) # %% - - visual studio code cell - - with open('chiirl_events.txt', encoding='utf-8') as f: doc = f.read() text3 = doc.split('____________________') # %% - - visual studio code cell - - # pip install langchain-huggingface # pip install sentence_transformers from langchain_huggingface.embeddings import HuggingFaceEmbeddings embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # %% - - visual studio code cell - - # pip install langchain-community # pip install faiss-cpu import faiss from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))) vector_store = FAISS( embedding_function=embeddings, index=index, docstore=InMemoryDocstore(), index_to_docstore_id={}, ) # %% - - visual studio code cell - - from langchain_core.documents import Document document = Document( page_content='testing document' ) vector_store.add_documents( documents=[document], ids=['test'] ) print(vector_store.get_by_ids(['test'])) results = vector_store.similarity_search_with_score( 'Will it be hot tomorrow?', k=1 ) print('score = ', results[0][1]) # %% - - visual studio code cell - - import math t1 = embeddings.embed_query('testing document') t2 = embeddings.embed_query('Will it be hot tomorrow?') print('euclidean dist = ', math.dist(t1,t2) ** 2) vector_store.delete(ids=['test']) # %% - - visual studio code cell - - # add the 3 documents to the vector db doc_list = [] for chunk in text1: doc_list.append(Document(page_content=chunk)) for chunk in text2: doc_list.append(Document(page_content=chunk)) for chunk in text3: doc_list.append(Document(page_content=chunk)) keys = vector_store.add_documents(documents=doc_list) # %% - - visual studio code cell - - from google import genai client = genai.Client(api_key='your api key here') response = client.models.generate_content( model='gemini-2.5-flash', contents='In one sentence, who performed at the 2025 game awards?' ) print('* * * no RAG * * *') print('\n') print(response.text) print('\n') # - - - prompt = 'In one sentence, who performed at the 2025 game awards?' rag_string = '' x = vector_store.similarity_search_with_score(prompt, k=5) for doc in x: y = doc[0].model_dump()['page_content'] rag_string += y rag_string += '\n\n' new_prompt = f''' using this information: {rag_string} answer this question: {prompt} ''' rag_response = client.models.generate_content( model='gemini-2.5-flash', contents=new_prompt ) print('* * * with RAG * * *') print('\n') print(rag_response.text) print('\n') # %% - - visual studio code cell - - response = client.models.generate_content( model='gemini-2.5-flash', contents='In one sentence, who announced the nominees for the 98th Academy Awards?' ) print('* * * no RAG * * *') print('\n') print(response.text) print('\n') # - - - prompt = 'In one sentence, who announced the nominees for the 98th Academy Awards?' rag_string = '' x = vector_store.similarity_search_with_score(prompt, k=5) for doc in x: y = doc[0].model_dump()['page_content'] rag_string += y rag_string += '\n\n' new_prompt = f''' using this information: {rag_string} answer this question: {prompt} ''' rag_response = client.models.generate_content( model='gemini-2.5-flash', contents=new_prompt ) print('* * * with RAG * * *') print('\n') print(rag_response.text) print('\n') # %% - - visual studio code cell - - response = client.models.generate_content( model='gemini-2.5-flash', contents='Give me a chicago tech meetup event happening in Dec 2025 in one sentence.' ) print('* * * no RAG * * *') print('\n') print(response.text) print('\n') # - - - prompt = 'Give me a chicago tech meetup event happening in Dec 2025 in one sentence.' rag_string = '' x = vector_store.similarity_search_with_score(prompt, k=5) for doc in x: y = doc[0].model_dump()['page_content'] rag_string += y rag_string += '\n\n' new_prompt = f''' using this information: {rag_string} answer this question: {prompt} ''' rag_response = client.models.generate_content( model='gemini-2.5-flash', contents=new_prompt ) print('* * * with RAG * * *') print('\n') print(rag_response.text) print('\n') # %% - - visual studio code cell - - # %% - - visual studio code cell - -