@@ -2,6 +2,7 @@ package rag
22
33import (
44 "bytes"
5+ "context"
56 "encoding/json"
67 "fmt"
78 "io"
@@ -15,6 +16,7 @@ import (
1516 "github.com/mudler/localrecall/pkg/xlog"
1617 "github.com/mudler/localrecall/rag/engine"
1718 "github.com/mudler/localrecall/rag/types"
19+ "github.com/sashabaranov/go-openai"
1820)
1921
2022// CollectionState represents the persistent state of a collection
@@ -55,7 +57,7 @@ func loadDB(path string) (*CollectionState, error) {
5557 return state , nil
5658}
5759
58- func NewPersistentCollectionKB (stateFile , assetDir string , store Engine , maxChunkSize int ) (* PersistentKB , error ) {
60+ func NewPersistentCollectionKB (stateFile , assetDir string , store Engine , maxChunkSize int , llmClient * openai. Client , embeddingModel string ) (* PersistentKB , error ) {
5961 // if file exists, try to load an existing state
6062 // if file does not exist, create a new state
6163 if err := os .MkdirAll (assetDir , 0755 ); err != nil {
@@ -89,6 +91,24 @@ func NewPersistentCollectionKB(stateFile, assetDir string, store Engine, maxChun
8991 index : state .Index ,
9092 }
9193
94+ // TODO: Automatically repopulate if embeddings dimensions are mismatching.
95+ // To check if dimensions are mismatching, we can check the number of dimensions of the first embedding in the index if is the same as the
96+ // dimension that the embedding model returns.
97+ resp , err := llmClient .CreateEmbeddings (context .Background (),
98+ openai.EmbeddingRequestStrings {
99+ Input : []string {"test" },
100+ Model : openai .EmbeddingModel (embeddingModel ),
101+ },
102+ )
103+ if err == nil && len (resp .Data ) > 0 {
104+ embedding := resp .Data [0 ].Embedding
105+ embeddingDimensions , err := db .Engine .GetEmbeddingDimensions ()
106+ if err == nil && len (embedding ) != embeddingDimensions {
107+ xlog .Info ("Embedding dimensions mismatch, repopulating" , "embeddingDimensions" , embeddingDimensions , "embedding" , embedding )
108+ return db , db .Repopulate ()
109+ }
110+ }
111+
92112 return db , nil
93113}
94114
0 commit comments