Skip to content

Vector database search

How I made a search function using a vector database and Azure cognitive search.

First I created a search service in Azure cognitive search.

C# Create and upload embeddings

I then used this code in C# to create embeddings from my docs markdown files and upload them to the vector database.

1
using System;
2
using System.Collections.Generic;
3
using System.IO;
4
using System.Threading.Tasks;
5
using Microsoft.SemanticKernel;
6
using Microsoft.SemanticKernel.Connectors.AI.OpenAI;
7
using Microsoft.SemanticKernel.Connectors.Memory.AzureCognitiveSearch;
8
using Microsoft.SemanticKernel.Memory;
9
using Microsoft.SemanticKernel.Plugins.Memory;
10
using RepoUtils;
11
12
public static class Example14_SemanticMemory
13
{
14
private const string MemoryCollectionName = "SKJmnDocs";
15
16
public static async Task RunAsync()
17
{
18
Console.WriteLine("==============================================================");
19
Console.WriteLine("======== Semantic Memory using Azure Cognitive Search ========");
20
Console.WriteLine("==============================================================");
21
22
var memoryWithACS = new MemoryBuilder()
23
.WithLoggerFactory(ConsoleLogger.LoggerFactory)
24
.WithOpenAITextEmbeddingGenerationService("text-embedding-ada-002", TestConfiguration.OpenAI.ApiKey)
25
.WithMemoryStore(new AzureCognitiveSearchMemoryStore(TestConfiguration.ACS.Endpoint, TestConfiguration.ACS.ApiKey))
26
.Build();
27
28
await RunExampleAsync(memoryWithACS);
29
30
}
31
32
public static async Task RunExampleAsync(ISemanticTextMemory memory)
33
{
34
await StoreMemoryAsync(memory);
35
36
await SearchMemoryAsync(memory, "Where can I learn about zig?");
37
38
await SearchMemoryAsync(memory, "I want to learn about Neovim");
39
40
await SearchMemoryAsync(memory, "What do you think about user specific configuration?");
41
42
await SearchMemoryAsync(memory, "How can I modify text to change code");
43
}
44
45
private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query)
46
{
47
Console.WriteLine("\nQuery: " + query + "\n");
48
49
var memoryResults = memory.SearchAsync(MemoryCollectionName, query, limit: 2, minRelevanceScore: 0.5);
50
51
int i = 0;
52
await foreach (MemoryQueryResult memoryResult in memoryResults)
53
{
54
Console.WriteLine($"Result {++i}:");
55
Console.WriteLine(" URL: : " + memoryResult.Metadata.Id);
56
//Console.WriteLine(" Title : " + memoryResult.Metadata.Description);
57
Console.WriteLine(" Relevance: " + memoryResult.Relevance);
58
Console.WriteLine();
59
}
60
61
Console.WriteLine("----------------------");
62
}
63
64
private static async Task StoreMemoryAsync(ISemanticTextMemory memory)
65
{
66
/* Store some data in the semantic memory.
67
*
68
* When using Azure Cognitive Search the data is automatically indexed on write.
69
*
70
* When using the combination of VolatileStore and Embedding generation, SK takes
71
* care of creating and storing the index
72
*/
73
74
Console.WriteLine("\nAdding some markdown files with their content.");
75
var jmnDocs = MarkdownData();
76
var i = 0;
77
foreach (var entry in jmnDocs)
78
{
79
await memory.SaveInformationAsync(
80
collection: MemoryCollectionName,
81
description: entry.Value,
82
id: entry.Key,
83
text: entry.Value,
84
additionalMetadata: entry.Key);
85
86
Console.Write($" #{++i} saved.");
87
}
88
89
Console.WriteLine("\n----------------------");
90
}
91
92
private static Dictionary<string, string> MarkdownData()
93
{
94
const string filePath = "/home/jmn/docs/";
95
const string urlBase = "https://docs.jmnorlund.net/";
96
Dictionary<string, string> docs = new();
97
98
foreach (string file in Directory.EnumerateFiles(filePath, "*.md", SearchOption.AllDirectories))
99
{
100
var urlString = file.Replace(filePath, "").Replace(".md", "");
101
var url = urlBase + urlString;
102
docs[url] = File.ReadAllText(file);
103
}
104
105
return docs;
106
}
107
108
}

TypeScript search client code

I then used the (beta) javascript SDK to query the database.

1
import { SearchClient, AzureKeyCredential } from "@azure/search-documents";
2
import OpenAI from 'openai';
3
4
const openai = new OpenAI();
5
6
async function doPureVectorSearch(query: string) {
7
const searchServiceEndpoint = process.env.ACS_ENDPOINT;
8
const searchServiceApiKey = process.env.ACS_QUERY_KEY;
9
const searchIndexName = process.env.ACS_INDEX;
10
11
const searchClient = new SearchClient(searchServiceEndpoint, searchIndexName, new AzureKeyCredential(searchServiceApiKey));
12
13
const embedding = await openai.embeddings.create({
14
model: "text-embedding-ada-002",
15
input: query,
16
});
17
18
const response = await searchClient.search("*", {
19
vectorQueries: [
20
{
21
kind: "vector",
22
vector: embedding.data[0].embedding,
23
fields: ["Embedding"]
24
}
25
],
26
select: ["AdditionalMetadata"]
27
});
28
29
let a = [];
30
for await (const result of response.results) {
31
a.push(result)
32
}
33
34
return a
35
}