Files
quartz/quartz/components/scripts/util.ts
Aaron Pham f533902c75 feat: semantic search (1/n)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 19:50:52 -04:00

112 lines
3.8 KiB
TypeScript

export function registerEscapeHandler(outsideContainer: HTMLElement | null, cb: () => void) {
if (!outsideContainer) return
function click(this: HTMLElement, e: HTMLElementEventMap["click"]) {
if (e.target !== this) return
e.preventDefault()
e.stopPropagation()
cb()
}
function esc(e: HTMLElementEventMap["keydown"]) {
if (!e.key.startsWith("Esc")) return
e.preventDefault()
cb()
}
outsideContainer?.addEventListener("click", click)
window.addCleanup(() => outsideContainer?.removeEventListener("click", click))
document.addEventListener("keydown", esc)
window.addCleanup(() => document.removeEventListener("keydown", esc))
}
export function removeAllChildren(node: HTMLElement) {
while (node.firstChild) {
node.removeChild(node.firstChild)
}
}
// AliasRedirect emits HTML redirects which also have the link[rel="canonical"]
// containing the URL it's redirecting to.
// Extracting it here with regex is _probably_ faster than parsing the entire HTML
// with a DOMParser effectively twice (here and later in the SPA code), even if
// way less robust - we only care about our own generated redirects after all.
const canonicalRegex = /<link rel="canonical" href="([^"]*)">/
export async function fetchCanonical(url: URL): Promise<Response> {
const res = await fetch(`${url}`)
if (!res.headers.get("content-type")?.startsWith("text/html")) {
return res
}
// reading the body can only be done once, so we need to clone the response
// to allow the caller to read it if it's was not a redirect
const text = await res.clone().text()
const [_, redirect] = text.match(canonicalRegex) ?? []
return redirect ? fetch(`${new URL(redirect, url)}`) : res
}
const contextWindowWords = 30
export const tokenizeTerm = (term: string) => {
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
const tokenLen = tokens.length
if (tokenLen > 1) {
for (let i = 1; i < tokenLen; i++) {
tokens.push(tokens.slice(0, i + 1).join(" "))
}
}
return tokens.sort((a, b) => b.length - a.length) // always highlight longest terms first
}
export function highlight(searchTerm: string, text: string, trim?: boolean) {
const tokenizedTerms = tokenizeTerm(searchTerm)
let tokenizedText = text.split(/\s+/).filter((t) => t !== "")
let startIndex = 0
let endIndex = tokenizedText.length - 1
if (trim) {
const includesCheck = (tok: string) =>
tokenizedTerms.some((term) => tok.toLowerCase().startsWith(term.toLowerCase()))
const occurrencesIndices = tokenizedText.map(includesCheck)
let bestSum = 0
let bestIndex = 0
for (let i = 0; i < Math.max(tokenizedText.length - contextWindowWords, 0); i++) {
const window = occurrencesIndices.slice(i, i + contextWindowWords)
const windowSum = window.reduce((total, cur) => total + (cur ? 1 : 0), 0)
if (windowSum >= bestSum) {
bestSum = windowSum
bestIndex = i
}
}
startIndex = Math.max(bestIndex - contextWindowWords, 0)
endIndex = Math.min(startIndex + 2 * contextWindowWords, tokenizedText.length - 1)
tokenizedText = tokenizedText.slice(startIndex, endIndex)
}
const slice = tokenizedText
.map((tok) => {
// see if this tok is prefixed by any search terms
for (const searchTok of tokenizedTerms) {
if (tok.toLowerCase().includes(searchTok.toLowerCase())) {
const regex = new RegExp(searchTok.toLowerCase(), "gi")
return tok.replace(regex, `<span class="highlight">$&</span>`)
}
}
return tok
})
.join(" ")
return `${startIndex === 0 ? "" : "..."}${slice}${
endIndex === tokenizedText.length - 1 ? "" : "..."
}`
}
// To be used with search and everything else with flexsearch
export const encode = (str: string) =>
str
.toLowerCase()
.split(/\s+/)
.filter((token) => token.length > 0)