Manager #

Index

Manager

manager.data_manager #

class gptcache.manager.data_manager.DataManager[source]#: DataManager manage the cache data, including save and search

class gptcache.manager.data_manager.MapDataManager(data_path, max_size, get_data_container=None)[source]#

MapDataManager, store all data in a map data structure.

Parameters

data_path (str) – the path to save the map data, defaults to ‘data_map.txt’.
max_size (int) – the max size for the cache, defaults to 1000.
get_data_container (Callable) – a Callable to get the data container, defaults to None.

Example

from gptcache.manager import get_data_manager

data_manager = get_data_manager("data_map.txt", 1000)

class gptcache.manager.data_manager.SSDataManager(s: gptcache.manager.scalar_data.base.CacheStorage, v: gptcache.manager.vector_data.base.VectorBase, o: Optional[gptcache.manager.object_data.base.ObjectBase], max_size, clean_size, policy='LRU')[source]#

Generate SSDataManage to manager the data.

Parameters

s (CacheStorage) – CacheStorage to manager the scalar data, it can be generated with gptcache.manager.CacheBase().
v (VectorBase) – VectorBase to manager the vector data, it can be generated with gptcache.manager.VectorBase().
max_size (int) – the max size for the cache, defaults to 1000.
clean_size (int) – the size to clean up, defaults to max_size * 0.2.
eviction (str) – The eviction policy, it is support “LRU” and “FIFO” now, and defaults to “LRU”.

save(question, answer, embedding_data, **kwargs)[source]#

Save the data and vectors to cache and vector storage.

Parameters

question (str) – question data.
answer (str, Answer or (Any, DataType)) – answer data.
embedding_data (np.ndarray) – vector data.

Example

import numpy as np
from gptcache.manager import get_data_manager, CacheBase, VectorBase

data_manager = get_data_manager(CacheBase('sqlite'), VectorBase('faiss', dimension=128))
data_manager.save('hello', 'hi', np.random.random((128, )).astype('float32'))

manager.eviction #

gptcache.manager.eviction.EvictionBase(name: str, **kwargs)[source]#

Generate specific CacheStorage with the configuration.

Parameters

name (str) – the name of the eviction, like: memory
policy (str) – eviction strategy
maxsize (int) – the maxsize of cache data
clean_size (int) – will clean the size of data when the size of cache data reaches the max size
on_evict (Callable[[List[Any]], None]) – the function for cleaning the data in the store

Example

from gptcache.manager import EvictionBase

cache_base = EvictionBase('memory', policy='lru', maxsize=10, clean_size=2, on_evict=lambda x: print(x))

manager.eviction_manager #

class gptcache.manager.eviction_manager.EvictionManager(scalar_storage, vector_base)[source]#

EvictionManager to manager the eviction policy.

Parameters

scalar_storage (CacheStorage) – CacheStorage to manager the scalar data.
vector_base (VectorBase) – VectorBase to manager the vector data.

manager.factory #

gptcache.manager.factory.get_data_manager(cache_base: Optional[Union[gptcache.manager.scalar_data.CacheBase, str]] = None, vector_base: Optional[Union[gptcache.manager.vector_data.VectorBase, str]] = None, object_base: Optional[Union[gptcache.manager.object_data.ObjectBase, str]] = None, max_size: int = 1000, clean_size: Optional[int] = None, eviction: str = 'LRU', data_path: str = 'data_map.txt', get_data_container: Optional[Callable] = None)[source]#

Generate SSDataManager (with cache_base, vector_base, max_size, clean_size and eviction params),: or MAPDataManager (with data_path, max_size and get_data_container params) to manager the data.

Parameters

cache_base (CacheBase or str) – a CacheBase object, or the name of the cache storage, it is support ‘sqlite’, ‘postgresql’, ‘mysql’, ‘mariadb’, ‘sqlserver’ and ‘oracle’ now.
vector_base (VectorBase or str) – a VectorBase object, or the name of the vector storage, it is support ‘milvus’, ‘faiss’ and ‘chromadb’ now.
object_base (ObjectBase or str) – a object storage, supports local path and s3.
max_size (int) – the max size for the cache, defaults to 1000.
clean_size (int) – the size to clean up, defaults to max_size * 0.2.
eviction (str) – the eviction policy, it is support “LRU” and “FIFO” now, and defaults to “LRU”.
data_path (str) – the path to save the map data, defaults to ‘data_map.txt’.
get_data_container (Callable) – a Callable to get the data container, defaults to None.

Returns

SSDataManager or MapDataManager.

Example

from gptcache.manager import get_data_manager, CacheBase, VectorBase

data_manager = get_data_manager(CacheBase('sqlite'), VectorBase('faiss', dimension=128))

manager.object_data #

gptcache.manager.object_data.ObjectBase(name: str, **kwargs)[source]#

Generate specific ObjectStorage with the configuration. For example, setting for: ObjectBase (with name) to manage LocalObjectStorage, S3 object storage.

Parameters

name (str) – the name of the object storage, it is support ‘local’, ‘s3’.
path (str) – the cache root of the LocalObjectStorage.
bucket (str) – the bucket of s3.
path_prefix (str) – s3 object prefix.
access_key (str) – the access_key of s3.
secret_key (str) – the secret_key of s3.

Returns

ObjectStorage.

Example

from gptcache.manager import ObjectBase

obj_storage = ObjectBase('local', path='./')

manager.scalar_data #

gptcache.manager.scalar_data.CacheBase(name: str, **kwargs)[source]#

Generate specific CacheStorage with the configuration. For example, setting for: SQLDataBase (with name, sql_url and table_name params) to manage SQLite, PostgreSQL, MySQL, MariaDB, SQL Server and Oracle.

Parameters

name (str) – the name of the cache storage, it is support ‘sqlite’, ‘postgresql’, ‘mysql’, ‘mariadb’, ‘sqlserver’ and ‘oracle’ now.
sql_url (str) – the url of the sql database for cache, such as ‘<db_type>+<db_driver>://<username>:<password>@<host>:<port>/<database>’, and the default value is related to the cache_store parameter, ‘sqlite:///./sqlite.db’ for ‘sqlite’, ‘postgresql+psycopg2://postgres:123456@127.0.0.1:5432/postgres’ for ‘postgresql’, ‘mysql+pymysql://root:123456@127.0.0.1:3306/mysql’ for ‘mysql’, ‘mariadb+pymysql://root:123456@127.0.0.1:3307/mysql’ for ‘mariadb’, ‘mssql+pyodbc://sa:Strongpsw_123@127.0.0.1:1434/msdb?driver=ODBC+Driver+17+for+SQL+Server’ for ‘sqlserver’, ‘oracle+cx_oracle://oracle:123456@127.0.0.1:1521/?service_name=helowin&encoding=UTF-8&nencoding=UTF-8’ for ‘oracle’.
table_name (str) – the table name for sql database, defaults to ‘gptcache’.

Returns

CacheStorage.

Example

from gptcache.manager import CacheBase

cache_base = CacheBase('sqlite')

manager.vector_data #

gptcache.manager.vector_data.VectorBase(name: str, **kwargs)[source]#

Generate specific VectorBase with the configuration. For example, setting for: Milvus (with , host, port, password, secure, collection_name, index_params, search_params, local_mode, local_data params), Faiss (with , index_path, dimension, top_k params), Chromadb (with top_k, client_settings, persist_directory, collection_name params), Hnswlib (with index_file_path, dimension, top_k, max_elements params).

Parameters

name (str) – the name of the vectorbase, it is support ‘milvus’, ‘faiss’, ‘chromadb’, ‘hnswlib’ now.
top_k (int) – the umber of the vectors results to return, defaults to 1.
dimension (int) – the dimension of the vector, defaults to 0.
index_path (str) – the path to Faiss index, defaults to ‘faiss.index’.
host (str) – the host for Milvus vector database, defaults to ‘localhost’.
port (str) – the port for Milvus vector database, defaults to ‘19530’.
user (str) – the user for Zilliz Cloud, defaults to “”.
password (str) – the password for Zilliz Cloud, defaults to “”.
secure – whether it is https with Zilliz Cloud, defaults to False.
index_params (dict) – the index parameters for Milvus, defaults to the HNSW index: {‘metric_type’: ‘L2’, ‘index_type’: ‘HNSW’, ‘params’: {‘M’: 8, ‘efConstruction’: 64}}.
search_params (dict) – the index parameters for Milvus, defaults to None.
collection_name (str) – the name of the collection for Milvus vector database, defaults to ‘gptcache’.
local_mode (bool) – if true, will start a local milvus server.
local_data (str) – required when local_mode is True.
client_settings (Settings) – the setting for Chromadb.
persist_directory (str) – the directory to persist, defaults to ‘.chromadb/’ in the current directory.
index_path – the path to hnswlib index, defaults to ‘hnswlib_index.bin’.
max_elements (int) – max_elements of hnswlib, defaults 100000.

Manager

Contents