Skip to content

Vector database search

How I made a search function using a vector database and Azure cognitive search.

First I created a search service in Azure cognitive search.

I then used this code in C# to create embeddings from my docs markdown files and upload them to the vector database.

using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.AI.OpenAI;
using Microsoft.SemanticKernel.Connectors.Memory.AzureCognitiveSearch;
using Microsoft.SemanticKernel.Memory;
using Microsoft.SemanticKernel.Plugins.Memory;
using RepoUtils;
public static class Example14_SemanticMemory
{
private const string MemoryCollectionName = "SKJmnDocs";
public static async Task RunAsync()
{
Console.WriteLine("==============================================================");
Console.WriteLine("======== Semantic Memory using Azure Cognitive Search ========");
Console.WriteLine("==============================================================");
var memoryWithACS = new MemoryBuilder()
.WithLoggerFactory(ConsoleLogger.LoggerFactory)
.WithOpenAITextEmbeddingGenerationService("text-embedding-ada-002", TestConfiguration.OpenAI.ApiKey)
.WithMemoryStore(new AzureCognitiveSearchMemoryStore(TestConfiguration.ACS.Endpoint, TestConfiguration.ACS.ApiKey))
.Build();
await RunExampleAsync(memoryWithACS);
}
public static async Task RunExampleAsync(ISemanticTextMemory memory)
{
await StoreMemoryAsync(memory);
await SearchMemoryAsync(memory, "Where can I learn about zig?");
await SearchMemoryAsync(memory, "I want to learn about Neovim");
await SearchMemoryAsync(memory, "What do you think about user specific configuration?");
await SearchMemoryAsync(memory, "How can I modify text to change code");
}
private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query)
{
Console.WriteLine("\nQuery: " + query + "\n");
var memoryResults = memory.SearchAsync(MemoryCollectionName, query, limit: 2, minRelevanceScore: 0.5);
int i = 0;
await foreach (MemoryQueryResult memoryResult in memoryResults)
{
Console.WriteLine($"Result {++i}:");
Console.WriteLine(" URL: : " + memoryResult.Metadata.Id);
//Console.WriteLine(" Title : " + memoryResult.Metadata.Description);
Console.WriteLine(" Relevance: " + memoryResult.Relevance);
Console.WriteLine();
}
Console.WriteLine("----------------------");
}
private static async Task StoreMemoryAsync(ISemanticTextMemory memory)
{
/* Store some data in the semantic memory.
*
* When using Azure Cognitive Search the data is automatically indexed on write.
*
* When using the combination of VolatileStore and Embedding generation, SK takes
* care of creating and storing the index
*/
Console.WriteLine("\nAdding some markdown files with their content.");
var jmnDocs = MarkdownData();
var i = 0;
foreach (var entry in jmnDocs)
{
await memory.SaveInformationAsync(
collection: MemoryCollectionName,
description: entry.Value,
id: entry.Key,
text: entry.Value,
additionalMetadata: entry.Key);
Console.Write($" #{++i} saved.");
}
Console.WriteLine("\n----------------------");
}
private static Dictionary<string, string> MarkdownData()
{
const string filePath = "/home/jmn/docs/";
const string urlBase = "https://docs.jmnorlund.net/";
Dictionary<string, string> docs = new();
foreach (string file in Directory.EnumerateFiles(filePath, "*.md", SearchOption.AllDirectories))
{
var urlString = file.Replace(filePath, "").Replace(".md", "");
var url = urlBase + urlString;
docs[url] = File.ReadAllText(file);
}
return docs;
}
}

I then used the (beta) javascript SDK to query the database.

import { SearchClient, AzureKeyCredential } from "@azure/search-documents";
import OpenAI from 'openai';
const openai = new OpenAI();
async function doPureVectorSearch(query: string) {
const searchServiceEndpoint = process.env.ACS_ENDPOINT;
const searchServiceApiKey = process.env.ACS_QUERY_KEY;
const searchIndexName = process.env.ACS_INDEX;
const searchClient = new SearchClient(searchServiceEndpoint, searchIndexName, new AzureKeyCredential(searchServiceApiKey));
const embedding = await openai.embeddings.create({
model: "text-embedding-ada-002",
input: query,
});
const response = await searchClient.search("*", {
vectorQueries: [
{
kind: "vector",
vector: embedding.data[0].embedding,
fields: ["Embedding"]
}
],
select: ["AdditionalMetadata"]
});
let a = [];
for await (const result of response.results) {
a.push(result)
}
return a
}