Vector database search
How I made a search function using a vector database and Azure cognitive search.
First I created a search service in Azure cognitive search.
C# Create and upload embeddings
I then used this code in C# to create embeddings from my docs markdown files and upload them to the vector database.
1using System;2using System.Collections.Generic;3using System.IO;4using System.Threading.Tasks;5using Microsoft.SemanticKernel;6using Microsoft.SemanticKernel.Connectors.AI.OpenAI;7using Microsoft.SemanticKernel.Connectors.Memory.AzureCognitiveSearch;8using Microsoft.SemanticKernel.Memory;9using Microsoft.SemanticKernel.Plugins.Memory;10using RepoUtils;11
12public static class Example14_SemanticMemory13{14 private const string MemoryCollectionName = "SKJmnDocs";15
16 public static async Task RunAsync()17 {18 Console.WriteLine("==============================================================");19 Console.WriteLine("======== Semantic Memory using Azure Cognitive Search ========");20 Console.WriteLine("==============================================================");21
22 var memoryWithACS = new MemoryBuilder()23 .WithLoggerFactory(ConsoleLogger.LoggerFactory)24 .WithOpenAITextEmbeddingGenerationService("text-embedding-ada-002", TestConfiguration.OpenAI.ApiKey)25 .WithMemoryStore(new AzureCognitiveSearchMemoryStore(TestConfiguration.ACS.Endpoint, TestConfiguration.ACS.ApiKey))26 .Build();27
28 await RunExampleAsync(memoryWithACS);29
30 }31
32 public static async Task RunExampleAsync(ISemanticTextMemory memory)33 {34 await StoreMemoryAsync(memory);35
36 await SearchMemoryAsync(memory, "Where can I learn about zig?");37
38 await SearchMemoryAsync(memory, "I want to learn about Neovim");39
40 await SearchMemoryAsync(memory, "What do you think about user specific configuration?");41
42 await SearchMemoryAsync(memory, "How can I modify text to change code");43 }44
45 private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query)46 {47 Console.WriteLine("\nQuery: " + query + "\n");48
49 var memoryResults = memory.SearchAsync(MemoryCollectionName, query, limit: 2, minRelevanceScore: 0.5);50
51 int i = 0;52 await foreach (MemoryQueryResult memoryResult in memoryResults)53 {54 Console.WriteLine($"Result {++i}:");55 Console.WriteLine(" URL: : " + memoryResult.Metadata.Id);56 //Console.WriteLine(" Title : " + memoryResult.Metadata.Description);57 Console.WriteLine(" Relevance: " + memoryResult.Relevance);58 Console.WriteLine();59 }60
61 Console.WriteLine("----------------------");62 }63
64 private static async Task StoreMemoryAsync(ISemanticTextMemory memory)65 {66 /* Store some data in the semantic memory.67 *68 * When using Azure Cognitive Search the data is automatically indexed on write.69 *70 * When using the combination of VolatileStore and Embedding generation, SK takes71 * care of creating and storing the index72 */73
74 Console.WriteLine("\nAdding some markdown files with their content.");75 var jmnDocs = MarkdownData();76 var i = 0;77 foreach (var entry in jmnDocs)78 {79 await memory.SaveInformationAsync(80 collection: MemoryCollectionName,81 description: entry.Value,82 id: entry.Key,83 text: entry.Value,84 additionalMetadata: entry.Key);85
86 Console.Write($" #{++i} saved.");87 }88
89 Console.WriteLine("\n----------------------");90 }91
92 private static Dictionary<string, string> MarkdownData()93 {94 const string filePath = "/home/jmn/docs/";95 const string urlBase = "https://docs.jmnorlund.net/";96 Dictionary<string, string> docs = new();97
98 foreach (string file in Directory.EnumerateFiles(filePath, "*.md", SearchOption.AllDirectories))99 {100 var urlString = file.Replace(filePath, "").Replace(".md", "");101 var url = urlBase + urlString;102 docs[url] = File.ReadAllText(file);103 }104
105 return docs;106 }107
108}
TypeScript search client code
I then used the (beta) javascript SDK to query the database.
1import { SearchClient, AzureKeyCredential } from "@azure/search-documents";2import OpenAI from 'openai';3
4const openai = new OpenAI();5
6async function doPureVectorSearch(query: string) {7 const searchServiceEndpoint = process.env.ACS_ENDPOINT;8 const searchServiceApiKey = process.env.ACS_QUERY_KEY;9 const searchIndexName = process.env.ACS_INDEX;10
11 const searchClient = new SearchClient(searchServiceEndpoint, searchIndexName, new AzureKeyCredential(searchServiceApiKey));12
13 const embedding = await openai.embeddings.create({14 model: "text-embedding-ada-002",15 input: query,16 });17
18 const response = await searchClient.search("*", {19 vectorQueries: [20 {21 kind: "vector",22 vector: embedding.data[0].embedding,23 fields: ["Embedding"]24 }25 ],26 select: ["AdditionalMetadata"]27 });28
29 let a = [];30 for await (const result of response.results) {31 a.push(result)32 }33
34 return a35}