# %% - - visual studio code cell - -


# pip install langchain-text-splitters
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

with open('wiki_game_awards_2025.txt', encoding='utf-8') as f:
    text1 = text_splitter.split_text(f.read())


# %% - - visual studio code cell - -


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

with open('wiki_98th_oscars.txt', encoding='utf-8') as f:
    text2 = text_splitter.split_text(f.read())


# %% - - visual studio code cell - -


with open('chiirl_events.txt', encoding='utf-8') as f:
    doc = f.read()
    text3 = doc.split('____________________')


# %% - - visual studio code cell - -


# pip install langchain-huggingface
# pip install sentence_transformers
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


# %% - - visual studio code cell - -


# pip install langchain-community
# pip install faiss-cpu
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)


# %% - - visual studio code cell - -


from langchain_core.documents import Document

document = Document(
    page_content='testing document'
)

vector_store.add_documents(
    documents=[document],
    ids=['test']
)

print(vector_store.get_by_ids(['test']))

results = vector_store.similarity_search_with_score(
    'Will it be hot tomorrow?', k=1
)

print('score = ', results[0][1])


# %% - - visual studio code cell - -


import math

t1 = embeddings.embed_query('testing document')
t2 = embeddings.embed_query('Will it be hot tomorrow?')

print('euclidean dist = ', math.dist(t1,t2) ** 2)

vector_store.delete(ids=['test'])


# %% - - visual studio code cell - -


# add the 3 documents to the vector db

doc_list = []

for chunk in text1:
    doc_list.append(Document(page_content=chunk))

for chunk in text2:
    doc_list.append(Document(page_content=chunk))

for chunk in text3:
    doc_list.append(Document(page_content=chunk))

keys = vector_store.add_documents(documents=doc_list)


# %% - - visual studio code cell - -


from google import genai

client = genai.Client(api_key='your api key here')

response = client.models.generate_content(
    model='gemini-2.5-flash', contents='In one sentence, who performed at the 2025 game awards?'
)

print('* * * no RAG * * *')
print('\n')
print(response.text)
print('\n')

# - - -

prompt = 'In one sentence, who performed at the 2025 game awards?'
rag_string = ''
x = vector_store.similarity_search_with_score(prompt, k=5)

for doc in x:
    y = doc[0].model_dump()['page_content']
    rag_string += y
    rag_string += '\n\n'

new_prompt = f'''

using this information:

{rag_string}

answer this question: {prompt}

'''

rag_response = client.models.generate_content(
    model='gemini-2.5-flash', contents=new_prompt
)

print('* * * with RAG * * *')
print('\n')
print(rag_response.text)
print('\n')


# %% - - visual studio code cell - -


response = client.models.generate_content(
    model='gemini-2.5-flash', contents='In one sentence, who announced the nominees for the 98th Academy Awards?'
)

print('* * * no RAG * * *')
print('\n')
print(response.text)
print('\n')

# - - -

prompt = 'In one sentence, who announced the nominees for the 98th Academy Awards?'
rag_string = ''
x = vector_store.similarity_search_with_score(prompt, k=5)

for doc in x:
    y = doc[0].model_dump()['page_content']
    rag_string += y
    rag_string += '\n\n'

new_prompt = f'''

using this information:

{rag_string}

answer this question: {prompt}

'''

rag_response = client.models.generate_content(
    model='gemini-2.5-flash', contents=new_prompt
)

print('* * * with RAG * * *')
print('\n')
print(rag_response.text)
print('\n')


# %% - - visual studio code cell - -


response = client.models.generate_content(
    model='gemini-2.5-flash', contents='Give me a chicago tech meetup event happening in Dec 2025 in one sentence.'
)

print('* * * no RAG * * *')
print('\n')
print(response.text)
print('\n')

# - - -

prompt = 'Give me a chicago tech meetup event happening in Dec 2025 in one sentence.'
rag_string = ''
x = vector_store.similarity_search_with_score(prompt, k=5)

for doc in x:
    y = doc[0].model_dump()['page_content']
    rag_string += y
    rag_string += '\n\n'

new_prompt = f'''

using this information:

{rag_string}

answer this question: {prompt}

'''

rag_response = client.models.generate_content(
    model='gemini-2.5-flash', contents=new_prompt
)

print('* * * with RAG * * *')
print('\n')
print(rag_response.text)
print('\n')


# %% - - visual studio code cell - -

# %% - - visual studio code cell - -