Source code for langchain.indexes.vectorstore

from typing import Any, List, Optional, Type

from pydantic import BaseModel, Extra, Field

from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.document_loaders.base import BaseLoader
from langchain.embeddings.base import Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.openai import OpenAI
from langchain.schema import Document
from langchain.schema.language_model import BaseLanguageModel
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.chroma import Chroma


def _get_default_text_splitter() -> TextSplitter:
    return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)


[docs]class VectorStoreIndexWrapper(BaseModel): """Wrapper around a vectorstore for easy access.""" vectorstore: VectorStore
[docs] class Config: """Configuration for this pydantic object.""" extra = Extra.forbid arbitrary_types_allowed = True
[docs] def query( self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any ) -> str: """Query the vectorstore.""" llm = llm or OpenAI(temperature=0) chain = RetrievalQA.from_chain_type( llm, retriever=self.vectorstore.as_retriever(), **kwargs ) return chain.run(question)
[docs] def query_with_sources( self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any ) -> dict: """Query the vectorstore and get back sources.""" llm = llm or OpenAI(temperature=0) chain = RetrievalQAWithSourcesChain.from_chain_type( llm, retriever=self.vectorstore.as_retriever(), **kwargs ) return chain({chain.question_key: question})
[docs]class VectorstoreIndexCreator(BaseModel): """Logic for creating indexes.""" vectorstore_cls: Type[VectorStore] = Chroma embedding: Embeddings = Field(default_factory=OpenAIEmbeddings) text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter) vectorstore_kwargs: dict = Field(default_factory=dict)
[docs] class Config: """Configuration for this pydantic object.""" extra = Extra.forbid arbitrary_types_allowed = True
[docs] def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper: """Create a vectorstore index from loaders.""" docs = [] for loader in loaders: docs.extend(loader.load()) return self.from_documents(docs)
[docs] def from_documents(self, documents: List[Document]) -> VectorStoreIndexWrapper: """Create a vectorstore index from documents.""" sub_docs = self.text_splitter.split_documents(documents) vectorstore = self.vectorstore_cls.from_documents( sub_docs, self.embedding, **self.vectorstore_kwargs ) return VectorStoreIndexWrapper(vectorstore=vectorstore)