Andy Fitzgerald
Information Architect & Content Strategist
Automatically retrieve images, titles, short descriptions, and more from linked resources on the web.
import GetLinkedData from './linkedDataInput'
defineField({
name: 'resourceUrl',
title: 'Resource URL',
type: 'url',
components: {
input: GetLinkedData,
},
}),
defineField({
name: 'ldMetadata',
title: 'Linked Data Metadata',
type: 'object',
fields: [
{ name: 'ldIsUpdating', type: 'boolean' },
{ name: 'ldLastUpdated', type: 'datetime' },
{ name: 'ldLastRequested', type: 'datetime' },
{ name: 'ldUpdateIssue', type: 'string' },
],
}),
defineField({
name: 'title',
type: 'string',
title: 'Title',
}),
defineField({
name: 'author',
type: 'string',
title: 'Author',
}),
defineField({
name: 'publisher',
type: 'publisher',
}),
defineField({
name: 'pubDate',
type: 'date',
title: 'Date First Published',
}),
defineField({
name: 'metaDescription',
type: 'text',
title: 'Description',
rows: 3,
}),
defineField({
name: 'resourceImage',
type: 'image',
title: 'Image',
options: {
hotspot: true,
},
}),import {UrlInputProps, useFormValue, useClient} from 'sanity'
import {Button, Box, Text, Spinner, Card, Flex, useToast} from '@sanity/ui'
type Props = UrlInputProps & {
metaPath?: string[] // the path of the `ldMetadata` object
}
export default function GetLinkedData(props: Props) {
const {renderDefault, value, metaPath = ['ldMetadata']} = props
const client = useClient({apiVersion: 'v2025-10-20'})
const toast = useToast()
const docId = useFormValue(['_id']) as string | undefined
const isUpdating = useFormValue([...metaPath, 'ldIsUpdating']) as boolean | undefined
const lastUpdatedISO = useFormValue([...metaPath, 'ldLastUpdated']) as string | undefined
const updateIssue = useFormValue([...metaPath, 'ldUpdateIssue']) as string | undefined
const lastUpdatedDate = lastUpdatedISO ? new Date(lastUpdatedISO) : undefined
const handleClick = async () => {
if (!docId) return
toast.push({
status: 'success',
title: 'Linked Data fetch initiated.'
})
const now = new Date().toISOString()
await client
.patch(docId)
.setIfMissing({[metaPath[0]]: {}})
.set({
[`${metaPath.join('.')}.ldLastRequested`]: now,
})
.commit({returnDocuments: false})
}
return (
<Box>
{renderDefault(props)}
<Card paddingTop={[3]}>
<Button
fontSize={[2]}
padding={[3]}
text="Get linked data"
mode="ghost"
disabled={!value || !isValidUrl(value)}
tone="default"
width="fill"
onClick={handleClick}
/>
</Card>
<Card paddingTop={3}>
<Flex direction="row" gap={2}>
{isUpdating ? (
<>
<Spinner size={1} />
<Text size={1} weight="medium" muted>
Fetching linked data
</Text>
</>
) : updateIssue ? (
<Text size={1} weight="medium" muted>
{updateIssue}
</Text>
) : (
lastUpdatedDate && (
<Text size={1} weight="medium" muted>
Last updated {lastUpdatedDate.toLocaleDateString('en-US')}
</Text>
)
)}
</Flex>
</Card>
</Box>
)
}
function isValidUrl(url: string) {
try {
new URL(url)
return true
} catch {
return false
}
}
import {createClient} from "@sanity/client";
import {documentEventHandler} from "@sanity/functions";
// Dynamic imports for CJS/ESM interop
const metascraper = (await import("metascraper")).default;
const author = (await import("metascraper-author")).default;
const date = (await import("metascraper-date")).default;
const description = (await import("metascraper-description")).default;
const image = (await import("metascraper-image")).default;
const publisher = (await import("metascraper-publisher")).default;
const title = (await import("metascraper-title")).default;
// Types
type LinkedData = {
author?: string;
date?: string;
description?: string;
image?: string;
publisher?: string;
title?: string;
};
type PatchTarget =
| {path: string[]; operation: "set"; value: any}
| {path: string[]; operation: "unset"};
// Patch helper
const patchAgent = (
client: ReturnType<typeof createClient>,
noWrite: boolean = false
) => {
return async (documentId: string, target: PatchTarget | PatchTarget[]) => {
await client.agent.action.patch({
schemaId: "_.schemas.production",
documentId,
target,
noWrite,
});
};
};
// Handler
export const handler = documentEventHandler(async ({context, event}) => {
const client = createClient({
...context.clientOptions,
apiVersion: "vX",
useCdn: false,
});
const {data} = event;
const {local} = context; // local is true when running locally
const patch = patchAgent(client, local);
// Targets are cleared out for each invocation
const targets: PatchTarget[] = [];
const has = (v: unknown) =>
v !== null &&
v !== undefined &&
!(typeof v === "string" && v.trim() === "");
const setIf = (path: string[], value: unknown) => {
if (has(value)) targets.push({path, operation: "set", value});
};
const getData = metascraper([
author(),
date(),
description(),
image(),
publisher(),
title(),
]);
// Log failures to console
const log = (...args: unknown[]) => console.log("[get-linked-data]", ...args);
// Log failures to dataset & reset updating flag
const fail = async (message: string) => {
log("fail:", message);
await patch(data._id, [
{
path: ["ldMetadata", "ldIsUpdating"],
operation: "set",
value: false,
},
{
path: ["ldMetadata", "ldUpdateIssue"],
operation: "set",
value: message,
},
]);
};
try {
if (!has(data?.url)) {
await fail("No URL found on document.");
return;
}
// 1. Set ldIsUpdating to `true` to prevent repeat calls
await patch(data._id, {
path: ["ldMetadata", "ldIsUpdating"],
operation: "set",
value: true,
});
// 2. Fetch HTML (set a UA to improve success on some sites)
let html: string;
try {
const res = await fetch(data.url, {
redirect: "follow",
headers: {
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
accept: "text/html,application/xhtml+xml",
},
});
if (!res.ok) {
await fail(`Failed to fetch URL (${res.status} ${res.statusText}).`);
return;
}
html = await res.text();
} catch (e) {
await fail("This site couldn't be reached. Please check the URL.");
return;
}
// 3. Extract metadata
let ld: LinkedData = {};
try {
ld = (await getData({html, url: data.url})) as LinkedData;
} catch (e) {
await fail(
"There was an issue extracting linked data from the page. Please check the URL."
);
return;
}
if (!Object.values(ld).some(has)) {
await fail("No linked data was found at this URL.");
return;
}
// 4. Upload image, if present, to the asset store
let imageAssetId: string | undefined;
try {
if (ld.image) {
const imgRes = await fetch(ld.image, {
redirect: "follow",
headers: {accept: "image/*"},
});
const arrayBuffer = await imgRes.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
const imageAsset = await client.assets.upload("image", buffer);
imageAssetId = imageAsset._id;
}
} catch (err) {
console.warn("Image fetch/upload skipped:", err);
}
// 5) Build conditional patch
setIf(["title"], ld.title);
setIf(["author"], ld.author);
setIf(["metaDescription"], ld.description);
setIf(["pubDate"], ld.date ? ld.date.split("T")[0] : undefined);
setIf(["publisher", "pubName"], ld.publisher);
// a) Only set resourceImage if we actually uploaded one
if (has(imageAssetId)) {
targets.push({
path: ["resourceImage"],
operation: "set",
value: {
_type: "image",
asset: {_type: "reference", _ref: imageAssetId},
},
});
}
// b) Unset any previously logged issues
targets.push({
path: ["ldMetadata", "ldUpdateIssue"],
operation: "unset",
});
// c) Always update bookkeeping flags
targets.push(
{
path: ["ldMetadata", "ldLastUpdated"],
operation: "set",
value: new Date().toISOString(),
},
{
path: ["ldMetadata", "ldIsUpdating"],
operation: "set",
value: false,
}
);
// 6) apply the schema-aware patch
await patch(data._id, targets);
console.log(
local
? "Linked Data (LOCAL TEST MODE - Content Lake not updated):"
: "Linked Data:",
ld
);
} catch (err) {
// Final safety net: make sure to clear the updating flag
try {
await patch(data._id, {
path: ["ldMetadata", "ldIsUpdating"],
operation: "set",
value: false,
});
} finally {
console.error("[get-linked-data] fatal error:", err);
}
}
});defineDocumentFunction({
type: "sanity.function.document",
name: "get-linked-data",
src: "./functions/get-linked-data",
memory: 2,
timeout: 30,
event: {
on: ["update", "create"],
includeDrafts: true,
includeAllVersions: true,
filter:
`_type == 'resource'
&& (
delta::changedAny(ldMetadata.ldLastRequested)
|| (
!defined(before().ldMetadata.ldLastRequested)
&& defined(after().ldMetadata.ldLastRequested)
)
|| (
delta::operation() == 'create'
&& defined(resourceUrl)
)
)
&& ldMetadata.ldIsUpdating != true
`,
projection:
"{_id, title, 'url':resourceUrl}",
},
}),Most of the sites you link to on the web publish some form of queryable data about the titles, authors, descriptions, publication dates, and hero images of their resources. With this Sanity Function you can use that data to add rich descriptions, author information, and images to the links you post — all on document creation or at the press of a button.
Getting Started
View the complete source code and an example in context in the UX Methods project.
npx sanity blueprints initldMetadata object alongside your URL field and include the custom input componentnpx sanity deploynpx sanity blueprints deployHow it Works
The function automatically fetches linked data for new resources created with a value in the Resource URL field and refreshes that data at the press of a button:
Key Benefits
Technical Implementation
Since this function "queries" remote resources on the web as data stores, it includes more control features and error handling checks than you'll see in basic Sanity Functions. Key elements include:
This function can be added to any schema that has an existing "URL" field—without requiring you to migrate any data. To see an example of this function integrated into a live project, check out the UX Methods project on GitHub.
Information Architect & Content Strategist
Use the Sanity Embeddings Index to auto-tag resources from a pre-defined list of taxonomy terms managed in Sanity Studio.
Go to Taxonomy Term Auto-TagThree simple approaches to generate ranked lists of related content using taxonomy tags managed in Sanity Studio
Go to Related Resources by Taxonomy TagImport taxonomy terms, structure, and metadata into the Taxonomy Manager plugin. Includes a spreadsheet template you can use to author and correctly format your taxonomy.
Go to Import Taxonomy TermsCustom input component with a DIY webhook for connecting to APIs beyond publish, update, and delete events.
Go to Custom Input Component with Webhook