OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

How to pass more than 2 values in Hyde Document Embedding vectorstore retriever?

  • Thread starter Thread starter Dikshant Gupta
  • Start date Start date
D

Dikshant Gupta

Guest
I am trying to create hyde based embedding for the requirement and get the relevant documents from it. Then those relevant documents will be passed as context and original question. The prompt for the hyde is different than the one which I am using for getting final answer

hyde_prompt_template =""" get the guidelines to this requirement {input}. Use the {guideline_name} which are in the context and think how these guidelines will be helpful to this requirement. print only the final output. """

` main_prompt = ""You are a professional senior software architect and you need to find out relevant guidelines for the detailed requirement Generate guidelines for the below detailed requirement in {input} Use the guideline information from {context} and give the relevant guidelines for the {input} from {context} only Provide the guidelines only from context and don't use your own knowledge GIVE THE FINAL OUTPUT IN THE BELOW FORMAT Guideline Name:

Guidelines to follow: The guideline name is coming from the {guideline name} """ ` I tried passing more than 2 values in the below code

Code:
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings, OpenAI, ChatOpenAI
from langchain.chains.llm import LLMChain
from langchain_community.vectorstores.faiss import FAISS

hyde_prompt_template = """
get the guidelines to this requirement {input}. 
Use the {guideline_name} which are in the context and 
think how these guidelines will be helpful to this requirement. print 
only the final output.
"""
prompt = PromptTemplate.from_template(hyde_prompt_template )

llm_chain = LLMChain(llm=llm, prompt=prompt)

hyde_embeddings = HypotheticalDocumentEmbedder(
    llm_chain=llm_chain, base_embeddings=base_embeddings
)

faiss = FAISS.load_local(folder_path="local path",
                        embeddings=hyde_embeddings,
                        allow_dangerous_deserialization=True
                    )

retriever = faiss.as_retriever()

retriever.invoke({"input": requirement, "guideline_name": "GDPR"})

But I am getting below error

Code:
KeyError Traceback (most recent call last)
Cell In[59], line 1
----> 1 retriever.invoke({"input": requirement})

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\retrievers.py:194, in BaseRetriever.invoke(self, input, config, **kwargs)
175 """Invoke the retriever to get relevant documents.
176
177 Main entry point for synchronous retriever invocations.
(...)
191 retriever.invoke("query")
192 """
193 config = ensure_config(config)
--> 194 return self.get_relevant_documents(
195 input,
196 callbacks=config.get("callbacks"),
197 tags=config.get("tags"),
198 metadata=config.get("metadata"),
199 run_name=config.get("run_name"),
200 **kwargs,
201 )

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core_api\deprecation.py:148, in deprecated..deprecate..warning_emitting_wrapper(*args, **kwargs)
146 warned = True
147 emit_warning()
--> 148 return wrapped(*args, **kwargs)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\retrievers.py:323, in BaseRetriever.get_relevant_documents(self, query, callbacks, tags, metadata, run_name, **kwargs)
321 except Exception as e:
322 run_manager.on_retriever_error(e)
--> 323 raise e
324 else:
325 run_manager.on_retriever_end(
326 result,
327 )

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\retrievers.py:316, in BaseRetriever.get_relevant_documents(self, query, callbacks, tags, metadata, run_name, **kwargs)
314 _kwargs = kwargs if self._expects_other_args else {}
315 if self._new_arg_supported:
--> 316 result = self._get_relevant_documents(
317 query, run_manager=run_manager, **_kwargs
318 )
319 else:
320 result = self._get_relevant_documents(query, **_kwargs)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\vectorstores.py:696, in VectorStoreRetriever._get_relevant_documents(self, query, run_manager)
692 def _get_relevant_documents(
693 self, query: str, *, run_manager: CallbackManagerForRetrieverRun
694 ) -> List[Document]:
695 if self.search_type == "similarity":
--> 696 docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
697 elif self.search_type == "similarity_score_threshold":
698 docs_and_similarities = (
699 self.vectorstore.similarity_search_with_relevance_scores(
700 query, **self.search_kwargs
701 )
702 )

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_community\vectorstores\faiss.py:530, in FAISS.similarity_search(self, query, k, filter, fetch_k, **kwargs)
510 def similarity_search(
511 self,
512 query: str,
(...)
516 **kwargs: Any,
517 ) -> List[Document]:
518 """Return docs most similar to query.
519
520 Args:
(...)
528 List of Documents most similar to the query.
529 """
--> 530 docs_and_scores = self.similarity_search_with_score(
531 query, k, filter=filter, fetch_k=fetch_k, **kwargs
532 )
533 return [doc for doc, _ in docs_and_scores]

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_community\vectorstores\faiss.py:402, in FAISS.similarity_search_with_score(self, query, k, filter, fetch_k, **kwargs)
378 def similarity_search_with_score(
379 self,
380 query: str,
(...)
384 **kwargs: Any,
385 ) -> List[Tuple[Document, float]]:
386 """Return docs most similar to query.
387
388 Args:
(...)
400 L2 distance in float. Lower score represents more similarity.
401 """
--> 402 embedding = self._embed_query(query)
403 docs = self.similarity_search_with_score_by_vector(
404 embedding,
405 k,
(...)
408 **kwargs,
409 )
410 return docs

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_community\vectorstores\faiss.py:154, in FAISS._embed_query(self, text)
152 def _embed_query(self, text: str) -> List[float]:
153 if isinstance(self.embedding_function, Embeddings):
--> 154 return self.embedding_function.embed_query(text)
155 else:
156 return self.embedding_function(text)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain\chains\hyde\base.py:57, in HypotheticalDocumentEmbedder.embed_query(self, text)
55 """Generate a hypothetical document and embedded it."""
56 var_name = self.llm_chain.input_keys[0]
---> 57 result = self.llm_chain.generate([{var_name: text}])
58 documents = [generation.text for generation in result.generations[0]]
59 embeddings = self.embed_documents(documents)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain\chains\llm.py:135, in LLMChain.generate(self, input_list, run_manager)
129 def generate(
130 self,
131 input_list: List[Dict[str, Any]],
132 run_manager: Optional[CallbackManagerForChainRun] = None,
133 ) -> LLMResult:
134 """Generate LLM result from inputs."""
--> 135 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager)
136 callbacks = run_manager.get_child() if run_manager else None
137 if isinstance(self.llm, BaseLanguageModel):

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain\chains\llm.py:196, in LLMChain.prep_prompts(self, input_list, run_manager)
194 prompts = []
195 for inputs in input_list:
--> 196 selected_inputs = {k: inputs[k] for k in self.prompt.input_variables}
197 prompt = self.prompt.format_prompt(**selected_inputs)
198 _colored_text = get_colored_text(prompt.to_string(), "green")

KeyError: 'input'
<p>I am trying to create hyde based embedding for the requirement and get the relevant documents from it. Then those relevant documents will be passed as context and original question. The prompt for the hyde is different than the one which I am using for getting final answer</p>
<p><code>hyde_prompt_template =""" get the guidelines to this requirement {input}. Use the {guideline_name} which are in the context and think how these guidelines will be helpful to this requirement. print only the final output. """</code></p>
<p>` main_prompt = ""You are a professional senior software architect and you need to find out relevant
guidelines for the detailed requirement
Generate guidelines for the below detailed requirement in {input}
Use the guideline information from {context} and give the relevant guidelines for the {input} from {context} only
Provide the guidelines only from context and don't use your own knowledge
GIVE THE FINAL OUTPUT IN THE BELOW FORMAT
Guideline Name:</p>
<p>Guidelines to follow:
The guideline name is coming from the {guideline name}
"""
`
I tried passing more than 2 values in the below code</p>
<pre><code>from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings, OpenAI, ChatOpenAI
from langchain.chains.llm import LLMChain
from langchain_community.vectorstores.faiss import FAISS

hyde_prompt_template = """
get the guidelines to this requirement {input}.
Use the {guideline_name} which are in the context and
think how these guidelines will be helpful to this requirement. print
only the final output.
"""
prompt = PromptTemplate.from_template(hyde_prompt_template )

llm_chain = LLMChain(llm=llm, prompt=prompt)

hyde_embeddings = HypotheticalDocumentEmbedder(
llm_chain=llm_chain, base_embeddings=base_embeddings
)

faiss = FAISS.load_local(folder_path="local path",
embeddings=hyde_embeddings,
allow_dangerous_deserialization=True
)

retriever = faiss.as_retriever()

retriever.invoke({"input": requirement, "guideline_name": "GDPR"})
</code></pre>
<p><strong>But I am getting below error</strong></p>
<pre><code>KeyError Traceback (most recent call last)
Cell In[59], line 1
----> 1 retriever.invoke({"input": requirement})

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\retrievers.py:194, in BaseRetriever.invoke(self, input, config, **kwargs)
175 """Invoke the retriever to get relevant documents.
176
177 Main entry point for synchronous retriever invocations.
(...)
191 retriever.invoke("query")
192 """
193 config = ensure_config(config)
--> 194 return self.get_relevant_documents(
195 input,
196 callbacks=config.get("callbacks"),
197 tags=config.get("tags"),
198 metadata=config.get("metadata"),
199 run_name=config.get("run_name"),
200 **kwargs,
201 )

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core_api\deprecation.py:148, in deprecated..deprecate..warning_emitting_wrapper(*args, **kwargs)
146 warned = True
147 emit_warning()
--> 148 return wrapped(*args, **kwargs)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\retrievers.py:323, in BaseRetriever.get_relevant_documents(self, query, callbacks, tags, metadata, run_name, **kwargs)
321 except Exception as e:
322 run_manager.on_retriever_error(e)
--> 323 raise e
324 else:
325 run_manager.on_retriever_end(
326 result,
327 )

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\retrievers.py:316, in BaseRetriever.get_relevant_documents(self, query, callbacks, tags, metadata, run_name, **kwargs)
314 _kwargs = kwargs if self._expects_other_args else {}
315 if self._new_arg_supported:
--> 316 result = self._get_relevant_documents(
317 query, run_manager=run_manager, **_kwargs
318 )
319 else:
320 result = self._get_relevant_documents(query, **_kwargs)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_core\vectorstores.py:696, in VectorStoreRetriever._get_relevant_documents(self, query, run_manager)
692 def _get_relevant_documents(
693 self, query: str, *, run_manager: CallbackManagerForRetrieverRun
694 ) -> List[Document]:
695 if self.search_type == "similarity":
--> 696 docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
697 elif self.search_type == "similarity_score_threshold":
698 docs_and_similarities = (
699 self.vectorstore.similarity_search_with_relevance_scores(
700 query, **self.search_kwargs
701 )
702 )

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_community\vectorstores\faiss.py:530, in FAISS.similarity_search(self, query, k, filter, fetch_k, **kwargs)
510 def similarity_search(
511 self,
512 query: str,
(...)
516 **kwargs: Any,
517 ) -> List[Document]:
518 """Return docs most similar to query.
519
520 Args:
(...)
528 List of Documents most similar to the query.
529 """
--> 530 docs_and_scores = self.similarity_search_with_score(
531 query, k, filter=filter, fetch_k=fetch_k, **kwargs
532 )
533 return [doc for doc, _ in docs_and_scores]

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_community\vectorstores\faiss.py:402, in FAISS.similarity_search_with_score(self, query, k, filter, fetch_k, **kwargs)
378 def similarity_search_with_score(
379 self,
380 query: str,
(...)
384 **kwargs: Any,
385 ) -> List[Tuple[Document, float]]:
386 """Return docs most similar to query.
387
388 Args:
(...)
400 L2 distance in float. Lower score represents more similarity.
401 """
--> 402 embedding = self._embed_query(query)
403 docs = self.similarity_search_with_score_by_vector(
404 embedding,
405 k,
(...)
408 **kwargs,
409 )
410 return docs

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain_community\vectorstores\faiss.py:154, in FAISS._embed_query(self, text)
152 def _embed_query(self, text: str) -> List[float]:
153 if isinstance(self.embedding_function, Embeddings):
--> 154 return self.embedding_function.embed_query(text)
155 else:
156 return self.embedding_function(text)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain\chains\hyde\base.py:57, in HypotheticalDocumentEmbedder.embed_query(self, text)
55 """Generate a hypothetical document and embedded it."""
56 var_name = self.llm_chain.input_keys[0]
---> 57 result = self.llm_chain.generate([{var_name: text}])
58 documents = [generation.text for generation in result.generations[0]]
59 embeddings = self.embed_documents(documents)

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain\chains\llm.py:135, in LLMChain.generate(self, input_list, run_manager)
129 def generate(
130 self,
131 input_list: List[Dict[str, Any]],
132 run_manager: Optional[CallbackManagerForChainRun] = None,
133 ) -> LLMResult:
134 """Generate LLM result from inputs."""
--> 135 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager)
136 callbacks = run_manager.get_child() if run_manager else None
137 if isinstance(self.llm, BaseLanguageModel):

File c:\GEN_AI\compliance-and-guidelines\venv\Lib\site-packages\langchain\chains\llm.py:196, in LLMChain.prep_prompts(self, input_list, run_manager)
194 prompts = []
195 for inputs in input_list:
--> 196 selected_inputs = {k: inputs[k] for k in self.prompt.input_variables}
197 prompt = self.prompt.format_prompt(**selected_inputs)
198 _colored_text = get_colored_text(prompt.to_string(), "green")

KeyError: 'input'
</code></pre>
 
Top