Skip to content

Vector database search

How I made a search function using a vector database and Azure cognitive search.

First I created a search service in Azure cognitive search.

C# Create and upload embeddings

I then used this code in C# to create embeddings from my docs markdown files and upload them to the vector database.

1
using System;
2
using System.Collections.Generic;
3
using System.IO;
4
using System.Threading.Tasks;
5
using Microsoft.SemanticKernel;
6
using Microsoft.SemanticKernel.Connectors.AI.OpenAI;
7
using Microsoft.SemanticKernel.Connectors.Memory.AzureCognitiveSearch;
8
using Microsoft.SemanticKernel.Memory;
9
using Microsoft.SemanticKernel.Plugins.Memory;
10
using RepoUtils;
11

12
public static class Example14_SemanticMemory
13
{
14
    private const string MemoryCollectionName = "SKJmnDocs";
15

16
    public static async Task RunAsync()
17
    {
18
        Console.WriteLine("==============================================================");
19
        Console.WriteLine("======== Semantic Memory using Azure Cognitive Search ========");
20
        Console.WriteLine("==============================================================");
21

22
        var memoryWithACS = new MemoryBuilder()
23
            .WithLoggerFactory(ConsoleLogger.LoggerFactory)
24
            .WithOpenAITextEmbeddingGenerationService("text-embedding-ada-002", TestConfiguration.OpenAI.ApiKey)
25
            .WithMemoryStore(new AzureCognitiveSearchMemoryStore(TestConfiguration.ACS.Endpoint, TestConfiguration.ACS.ApiKey))
26
            .Build();
27

28
        await RunExampleAsync(memoryWithACS);
29

30
    }
31

32
    public static async Task RunExampleAsync(ISemanticTextMemory memory)
33
    {
34
        await StoreMemoryAsync(memory);
35

36
        await SearchMemoryAsync(memory, "Where can I learn about zig?");
37

38
        await SearchMemoryAsync(memory, "I want to learn about Neovim");
39

40
        await SearchMemoryAsync(memory, "What do you think about user specific configuration?");
41

42
        await SearchMemoryAsync(memory, "How can I modify text to change code");
43
    }
44

45
    private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query)
46
    {
47
        Console.WriteLine("\nQuery: " + query + "\n");
48

49
        var memoryResults = memory.SearchAsync(MemoryCollectionName, query, limit: 2, minRelevanceScore: 0.5);
50

51
        int i = 0;
52
        await foreach (MemoryQueryResult memoryResult in memoryResults)
53
        {
54
            Console.WriteLine($"Result {++i}:");
55
            Console.WriteLine("  URL:     : " + memoryResult.Metadata.Id);
56
            //Console.WriteLine("  Title    : " + memoryResult.Metadata.Description);
57
            Console.WriteLine("  Relevance: " + memoryResult.Relevance);
58
            Console.WriteLine();
59
        }
60

61
        Console.WriteLine("----------------------");
62
    }
63

64
    private static async Task StoreMemoryAsync(ISemanticTextMemory memory)
65
    {
66
        /* Store some data in the semantic memory.
67
         *
68
         * When using Azure Cognitive Search the data is automatically indexed on write.
69
         *
70
         * When using the combination of VolatileStore and Embedding generation, SK takes
71
         * care of creating and storing the index
72
         */
73

74
        Console.WriteLine("\nAdding some markdown files with their content.");
75
        var jmnDocs = MarkdownData();
76
        var i = 0;
77
        foreach (var entry in jmnDocs)
78
        {
79
            await memory.SaveInformationAsync(
80
                collection: MemoryCollectionName,
81
                description: entry.Value,
82
                id: entry.Key,
83
                text: entry.Value,
84
                additionalMetadata: entry.Key);
85

86
            Console.Write($" #{++i} saved.");
87
        }
88

89
        Console.WriteLine("\n----------------------");
90
    }
91

92
    private static Dictionary<string, string> MarkdownData()
93
    {
94
        const string filePath = "/home/jmn/docs/";
95
        const string urlBase = "https://docs.jmnorlund.net/";
96
        Dictionary<string, string> docs = new();
97

98
        foreach (string file in Directory.EnumerateFiles(filePath, "*.md", SearchOption.AllDirectories))
99
        {
100
            var urlString = file.Replace(filePath, "").Replace(".md", "");
101
            var url = urlBase + urlString;
102
            docs[url] = File.ReadAllText(file);
103
        }
104

105
        return docs;
106
    }
107

108
}

TypeScript search client code

I then used the (beta) javascript SDK to query the database.

1
import { SearchClient, AzureKeyCredential } from "@azure/search-documents";
2
import OpenAI from 'openai';
3

4
const openai = new OpenAI();
5

6
async function doPureVectorSearch(query: string) {
7
  const searchServiceEndpoint = process.env.ACS_ENDPOINT;
8
  const searchServiceApiKey = process.env.ACS_QUERY_KEY;
9
  const searchIndexName = process.env.ACS_INDEX;
10

11
  const searchClient = new SearchClient(searchServiceEndpoint, searchIndexName, new AzureKeyCredential(searchServiceApiKey));
12

13
  const embedding = await openai.embeddings.create({
14
    model: "text-embedding-ada-002",
15
    input: query,
16
  });
17

18
  const response = await searchClient.search("*", {
19
    vectorQueries: [
20
      {
21
        kind: "vector",
22
        vector: embedding.data[0].embedding,
23
        fields: ["Embedding"]
24
      }
25
    ],
26
    select: ["AdditionalMetadata"]
27
  });
28

29
  let a = [];
30
  for await (const result of response.results) {
31
    a.push(result)
32
  }
33

34
  return a
35
}

Links

https://learn.microsoft.com/en-us/azure/search/vector-search-overview