Vector database search
How I made a search function using a vector database and Azure cognitive search.
First I created a search service in Azure cognitive search.
C# Create and upload embeddings
Section titled “C# Create and upload embeddings”I then used this code in C# to create embeddings from my docs markdown files and upload them to the vector database.
using System;using System.Collections.Generic;using System.IO;using System.Threading.Tasks;using Microsoft.SemanticKernel;using Microsoft.SemanticKernel.Connectors.AI.OpenAI;using Microsoft.SemanticKernel.Connectors.Memory.AzureCognitiveSearch;using Microsoft.SemanticKernel.Memory;using Microsoft.SemanticKernel.Plugins.Memory;using RepoUtils;
public static class Example14_SemanticMemory{ private const string MemoryCollectionName = "SKJmnDocs";
public static async Task RunAsync() { Console.WriteLine("=============================================================="); Console.WriteLine("======== Semantic Memory using Azure Cognitive Search ========"); Console.WriteLine("==============================================================");
var memoryWithACS = new MemoryBuilder() .WithLoggerFactory(ConsoleLogger.LoggerFactory) .WithOpenAITextEmbeddingGenerationService("text-embedding-ada-002", TestConfiguration.OpenAI.ApiKey) .WithMemoryStore(new AzureCognitiveSearchMemoryStore(TestConfiguration.ACS.Endpoint, TestConfiguration.ACS.ApiKey)) .Build();
await RunExampleAsync(memoryWithACS);
}
public static async Task RunExampleAsync(ISemanticTextMemory memory) { await StoreMemoryAsync(memory);
await SearchMemoryAsync(memory, "Where can I learn about zig?");
await SearchMemoryAsync(memory, "I want to learn about Neovim");
await SearchMemoryAsync(memory, "What do you think about user specific configuration?");
await SearchMemoryAsync(memory, "How can I modify text to change code"); }
private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query) { Console.WriteLine("\nQuery: " + query + "\n");
var memoryResults = memory.SearchAsync(MemoryCollectionName, query, limit: 2, minRelevanceScore: 0.5);
int i = 0; await foreach (MemoryQueryResult memoryResult in memoryResults) { Console.WriteLine($"Result {++i}:"); Console.WriteLine(" URL: : " + memoryResult.Metadata.Id); //Console.WriteLine(" Title : " + memoryResult.Metadata.Description); Console.WriteLine(" Relevance: " + memoryResult.Relevance); Console.WriteLine(); }
Console.WriteLine("----------------------"); }
private static async Task StoreMemoryAsync(ISemanticTextMemory memory) { /* Store some data in the semantic memory. * * When using Azure Cognitive Search the data is automatically indexed on write. * * When using the combination of VolatileStore and Embedding generation, SK takes * care of creating and storing the index */
Console.WriteLine("\nAdding some markdown files with their content."); var jmnDocs = MarkdownData(); var i = 0; foreach (var entry in jmnDocs) { await memory.SaveInformationAsync( collection: MemoryCollectionName, description: entry.Value, id: entry.Key, text: entry.Value, additionalMetadata: entry.Key);
Console.Write($" #{++i} saved."); }
Console.WriteLine("\n----------------------"); }
private static Dictionary<string, string> MarkdownData() { const string filePath = "/home/jmn/docs/"; const string urlBase = "https://docs.jmnorlund.net/"; Dictionary<string, string> docs = new();
foreach (string file in Directory.EnumerateFiles(filePath, "*.md", SearchOption.AllDirectories)) { var urlString = file.Replace(filePath, "").Replace(".md", ""); var url = urlBase + urlString; docs[url] = File.ReadAllText(file); }
return docs; }
}TypeScript search client code
Section titled “TypeScript search client code”I then used the (beta) javascript SDK to query the database.
import { SearchClient, AzureKeyCredential } from "@azure/search-documents";import OpenAI from 'openai';
const openai = new OpenAI();
async function doPureVectorSearch(query: string) { const searchServiceEndpoint = process.env.ACS_ENDPOINT; const searchServiceApiKey = process.env.ACS_QUERY_KEY; const searchIndexName = process.env.ACS_INDEX;
const searchClient = new SearchClient(searchServiceEndpoint, searchIndexName, new AzureKeyCredential(searchServiceApiKey));
const embedding = await openai.embeddings.create({ model: "text-embedding-ada-002", input: query, });
const response = await searchClient.search("*", { vectorQueries: [ { kind: "vector", vector: embedding.data[0].embedding, fields: ["Embedding"] } ], select: ["AdditionalMetadata"] });
let a = []; for await (const result of response.results) { a.push(result) }
return a}