Embedding#

embedding.cohere#

class gptcache.embedding.cohere.Cohere(model: str = 'large', api_key: Optional[str] = None)[source]#

Generate text embedding for given text using Cohere.

Parameters
  • model (str) – model name (size), defaults to ‘large’.

  • api_key (str) – Cohere API Key.

Example

from gptcache.embedding import Cohere

test_sentence = 'Hello, world.'
encoder = Cohere(model='small', api_key='your_cohere_key')
embed = encoder.to_embeddings(test_sentence)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input

Parameters

data (str) – text in string.

Returns

a text embedding in shape of (dim,).

embedding.data2vec#

class gptcache.embedding.data2vec.Data2VecAudio(model_name='facebook/data2vec-audio-base-960h')[source]#

Generate audio embedding for given audio using pretrained models from Data2Vec.

Parameters

model (str) – model name, defaults to ‘facebook/data2vec-audio-base-960h’.

Example

from gptcache.embedding import Data2VecAudio

audio_file = 'test.wav'
encoder = Data2VecAudio(model='facebook/data2vec-audio-base-960h')
embed = encoder.to_embeddings(audio_file)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input

Parameters

data (str) – path to audio file.

Returns

a text embedding in shape of (dim,).

embedding.fasttext#

class gptcache.embedding.fasttext.FastText(model: str = 'en', dim: Optional[int] = None)[source]#

Generate sentence embedding for given text using pretrained models of different languages from fastText.

Parameters
  • model (str) – model name, defaults to ‘en’.

  • dim (int) – reduced dimension of embedding. If this parameter is not provided, the embedding dimension (300) will not change.

Example

from gptcache.embedding import FastText

test_sentence = 'Hello, world.'
encoder = FastText(model='en', dim=100)
embed = encoder.to_embeddings(test_sentence)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input

Parameters

data (str) – text in string.

Returns

a text embedding in shape of (dim,).

embedding.huggingface#

class gptcache.embedding.huggingface.Huggingface(model: str = 'sentence-transformers/all-MiniLM-L6-v2')[source]#

Generate sentence embedding for given text using pretrained models from Huggingface transformers.

Parameters

model (str) – model name, defaults to ‘sentence-transformers/all-MiniLM-L6-v2’.

Example

from gptcache.embedding import Huggingface

test_sentence = 'Hello, world.'
encoder = Huggingface(model='sentence-transformers/all-MiniLM-L6-v2')
embed = encoder.to_embeddings(test_sentence)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input

Parameters

data (str) – text in string.

Returns

a text embedding in shape of (dim,).

embedding.onnx#

class gptcache.embedding.onnx.Onnx(model='GPTCache/paraphrase-albert-onnx')[source]#

Generate text embedding for given text using ONNX Model.

Example

from gptcache.embedding import Onnx

test_sentence = 'Hello, world.'
encoder = Onnx(model='GPTCache/paraphrase-albert-onnx')
embed = encoder.to_embeddings(test_sentence)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input.

Parameters

data (str) – text in string.

Returns

a text embedding in shape of (dim,).

embedding.openai#

class gptcache.embedding.openai.OpenAI(model: str = 'text-embedding-ada-002', api_key: Optional[str] = None)[source]#

Generate text embedding for given text using OpenAI.

Parameters
  • model (str) – model name, defaults to ‘text-embedding-ada-002’.

  • api_key (str) – OpenAI API Key. When the parameter is not specified, it will load the key by default if it is available.

Example

from gptcache.embedding import OpenAI

test_sentence = 'Hello, world.'
encoder = OpenAI(api_key='your_openai_key')
embed = encoder.to_embeddings(test_sentence)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input

Parameters

data (str) – text in string.

Returns

a text embedding in shape of (dim,).

embedding.sbert#

class gptcache.embedding.sbert.SBERT(model: str = 'all-MiniLM-L6-v2')[source]#

Generate sentence embedding for given text using pretrained models of Sentence Transformers.

Parameters

model (str) – model name, defaults to ‘all-MiniLM-L6-v2’.

Example

from gptcache.embedding import SBERT

test_sentence = 'Hello, world.'
encoder = SBERT('paraphrase-albert-small-v2')
embed = encoder.to_embeddings(test_sentence)
property dimension#

Embedding dimension.

Returns

embedding dimension

to_embeddings(data, **_)[source]#

Generate embedding given text input

Parameters

data (str) – text in string.

Returns

a text embedding in shape of (dim,).

embedding.string#