Get Linked Data Function

By Andy Fitzgerald

Automatically retrieve images, titles, short descriptions, and more from linked resources on the web.

schemaTypes/resource/index.ts

import GetLinkedData from './linkedDataInput'

defineField({
  name: 'resourceUrl',
  title: 'Resource URL',
  type: 'url',
  components: {
    input: GetLinkedData,
  },
}),
defineField({
  name: 'ldMetadata',
  title: 'Linked Data Metadata',
  type: 'object',
  fields: [
    { name: 'ldIsUpdating', type: 'boolean' },
    { name: 'ldLastUpdated', type: 'datetime' },
    { name: 'ldLastRequested', type: 'datetime' },
    { name: 'ldUpdateIssue', type: 'string' },
  ],
}),
defineField({
  name: 'title',
  type: 'string',
  title: 'Title',
}),
defineField({
  name: 'author',
  type: 'string',
  title: 'Author',
}),
defineField({
  name: 'publisher',
  type: 'publisher',
}),
defineField({
  name: 'pubDate',
  type: 'date',
  title: 'Date First Published',
}),
defineField({
  name: 'metaDescription',
  type: 'text',
  title: 'Description',
  rows: 3,
}),
defineField({
  name: 'resourceImage',
  type: 'image',
  title: 'Image',
  options: {
    hotspot: true,
  },
}),

schemaTypes/resource/linkedDataInput.tsx

import {UrlInputProps, useFormValue, useClient} from 'sanity'
import {Button, Box, Text, Spinner, Card, Flex, useToast} from '@sanity/ui'

type Props = UrlInputProps & {
  metaPath?: string[] // the path of the `ldMetadata` object
}

export default function GetLinkedData(props: Props) {
  const {renderDefault, value, metaPath = ['ldMetadata']} = props

  const client = useClient({apiVersion: 'v2025-10-20'})
  const toast = useToast()
  const docId = useFormValue(['_id']) as string | undefined
  const isUpdating = useFormValue([...metaPath, 'ldIsUpdating']) as boolean | undefined
  const lastUpdatedISO = useFormValue([...metaPath, 'ldLastUpdated']) as string | undefined
  const updateIssue = useFormValue([...metaPath, 'ldUpdateIssue']) as string | undefined

  const lastUpdatedDate = lastUpdatedISO ? new Date(lastUpdatedISO) : undefined

  const handleClick = async () => {
    if (!docId) return
    toast.push({
      status: 'success',
      title: 'Linked Data fetch initiated.'
    })
    const now = new Date().toISOString()
    await client
      .patch(docId)
      .setIfMissing({[metaPath[0]]: {}})
      .set({
        [`${metaPath.join('.')}.ldLastRequested`]: now,
      })
      .commit({returnDocuments: false})
  }

  return (
    <Box>
      {renderDefault(props)}
      <Card paddingTop={[3]}>
        <Button
          fontSize={[2]}
          padding={[3]}
          text="Get linked data"
          mode="ghost"
          disabled={!value || !isValidUrl(value)}
          tone="default"
          width="fill"
          onClick={handleClick}
        />
      </Card>
      <Card paddingTop={3}>
        <Flex direction="row" gap={2}>
          {isUpdating ? (
            <>
              <Spinner size={1} />
              <Text size={1} weight="medium" muted>
                Fetching linked data
              </Text>
            </>
          ) : updateIssue ? (
            <Text size={1} weight="medium" muted>
              {updateIssue}
            </Text>
          ) : (
            lastUpdatedDate && (
              <Text size={1} weight="medium" muted>
                Last updated {lastUpdatedDate.toLocaleDateString('en-US')}
              </Text>
            )
          )}
        </Flex>
      </Card>
    </Box>
  )
}

function isValidUrl(url: string) {
  try {
    new URL(url)
    return true
  } catch {
    return false
  }
}

functions/get-linked-data/index.ts

import {createClient} from "@sanity/client";
import {documentEventHandler} from "@sanity/functions";

// Dynamic imports for CJS/ESM interop
const metascraper = (await import("metascraper")).default;
const author = (await import("metascraper-author")).default;
const date = (await import("metascraper-date")).default;
const description = (await import("metascraper-description")).default;
const image = (await import("metascraper-image")).default;
const publisher = (await import("metascraper-publisher")).default;
const title = (await import("metascraper-title")).default;

// Types
type LinkedData = {
  author?: string;
  date?: string;
  description?: string;
  image?: string;
  publisher?: string;
  title?: string;
};

type PatchTarget =
  | {path: string[]; operation: "set"; value: any}
  | {path: string[]; operation: "unset"};

// Patch helper
const patchAgent = (
  client: ReturnType<typeof createClient>,
  noWrite: boolean = false
) => {
  return async (documentId: string, target: PatchTarget | PatchTarget[]) => {
    await client.agent.action.patch({
      schemaId: "_.schemas.production",
      documentId,
      target,
      noWrite,
    });
  };
};

// Handler
export const handler = documentEventHandler(async ({context, event}) => {
  const client = createClient({
    ...context.clientOptions,
    apiVersion: "vX",
    useCdn: false,
  });

  const {data} = event;
  const {local} = context; // local is true when running locally
  const patch = patchAgent(client, local);

  // Targets are cleared out for each invocation
  const targets: PatchTarget[] = [];

  const has = (v: unknown) =>
    v !== null &&
    v !== undefined &&
    !(typeof v === "string" && v.trim() === "");

  const setIf = (path: string[], value: unknown) => {
    if (has(value)) targets.push({path, operation: "set", value});
  };

  const getData = metascraper([
    author(),
    date(),
    description(),
    image(),
    publisher(),
    title(),
  ]);

  // Log failures to console
  const log = (...args: unknown[]) => console.log("[get-linked-data]", ...args);

  // Log failures to dataset & reset updating flag
  const fail = async (message: string) => {
    log("fail:", message);
    await patch(data._id, [
      {
        path: ["ldMetadata", "ldIsUpdating"],
        operation: "set",
        value: false,
      },
      {
        path: ["ldMetadata", "ldUpdateIssue"],
        operation: "set",
        value: message,
      },
    ]);
  };

  try {
    if (!has(data?.url)) {
      await fail("No URL found on document.");
      return;
    }
    // 1. Set ldIsUpdating to `true` to prevent repeat calls
    await patch(data._id, {
      path: ["ldMetadata", "ldIsUpdating"],
      operation: "set",
      value: true,
    });

    // 2. Fetch HTML (set a UA to improve success on some sites)
    let html: string;
    try {
      const res = await fetch(data.url, {
        redirect: "follow",
        headers: {
          "user-agent":
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
          accept: "text/html,application/xhtml+xml",
        },
      });
      if (!res.ok) {
        await fail(`Failed to fetch URL (${res.status} ${res.statusText}).`);
        return;
      }
      html = await res.text();
    } catch (e) {
      await fail("This site couldn't be reached. Please check the URL.");
      return;
    }

    // 3. Extract metadata
    let ld: LinkedData = {};
    try {
      ld = (await getData({html, url: data.url})) as LinkedData;
    } catch (e) {
      await fail(
        "There was an issue extracting linked data from the page. Please check the URL."
      );
      return;
    }

    if (!Object.values(ld).some(has)) {
      await fail("No linked data was found at this URL.");
      return;
    }

    // 4. Upload image, if present, to the asset store
    let imageAssetId: string | undefined;
    try {
      if (ld.image) {
        const imgRes = await fetch(ld.image, {
          redirect: "follow",
          headers: {accept: "image/*"},
        });

        const arrayBuffer = await imgRes.arrayBuffer();
        const buffer = Buffer.from(arrayBuffer);
        const imageAsset = await client.assets.upload("image", buffer);
        imageAssetId = imageAsset._id;
      }
    } catch (err) {
      console.warn("Image fetch/upload skipped:", err);
    }

    // 5) Build conditional patch
    setIf(["title"], ld.title);
    setIf(["author"], ld.author);
    setIf(["metaDescription"], ld.description);
    setIf(["pubDate"], ld.date ? ld.date.split("T")[0] : undefined);
    setIf(["publisher", "pubName"], ld.publisher);

    // a) Only set resourceImage if we actually uploaded one
    if (has(imageAssetId)) {
      targets.push({
        path: ["resourceImage"],
        operation: "set",
        value: {
          _type: "image",
          asset: {_type: "reference", _ref: imageAssetId},
        },
      });
    }

    // b) Unset any previously logged issues
    targets.push({
      path: ["ldMetadata", "ldUpdateIssue"],
      operation: "unset",
    });

    // c) Always update bookkeeping flags
    targets.push(
      {
        path: ["ldMetadata", "ldLastUpdated"],
        operation: "set",
        value: new Date().toISOString(),
      },
      {
        path: ["ldMetadata", "ldIsUpdating"],
        operation: "set",
        value: false,
      }
    );

    // 6) apply the schema-aware patch
    await patch(data._id, targets);
    console.log(
      local
        ? "Linked Data (LOCAL TEST MODE - Content Lake not updated):"
        : "Linked Data:",
      ld
    );
  } catch (err) {
    // Final safety net: make sure to clear the updating flag
    try {
      await patch(data._id, {
        path: ["ldMetadata", "ldIsUpdating"],
        operation: "set",
        value: false,
      });
    } finally {
      console.error("[get-linked-data] fatal error:", err);
    }
  }
});

sanity.blueprints.ts

defineDocumentFunction({
  type: "sanity.function.document",
  name: "get-linked-data",
  src: "./functions/get-linked-data",
  memory: 2,
  timeout: 30,
  event: {
    on: ["update", "create"],
    includeDrafts: true,
    includeAllVersions: true,
    filter:
      `_type == 'resource' 
      && (
        delta::changedAny(ldMetadata.ldLastRequested) 
        || (
          !defined(before().ldMetadata.ldLastRequested) 
          && defined(after().ldMetadata.ldLastRequested)
        )
        || (
          delta::operation() == 'create'
          && defined(resourceUrl)
        )
      )
      && ldMetadata.ldIsUpdating != true
      `,
    projection:
      "{_id, title, 'url':resourceUrl}",
  },
}),

Most of the sites you link to on the web publish some form of queryable data about the titles, authors, descriptions, publication dates, and hero images of their resources. With this Sanity Function you can use that data to add rich descriptions, author information, and images to the links you post — all on document creation or at the press of a button.

Getting Started

View the complete source code and an example in context in the UX Methods project.

  • Initialize blueprints if you haven't already: npx sanity blueprints init
  • Add the ldMetadata object alongside your URL field and include the custom input component
  • Add the function and blueprint definition
  • Deploy your schema: npx sanity deploy
  • Deploy the blueprint: npx sanity blueprints deploy

How it Works

The function automatically fetches linked data for new resources created with a value in the Resource URL field and refreshes that data at the press of a button:

  1. Trigger the Get Linked Data function by adding a link to the ResourceURL field in a new document or clicking the "Get linked data" input component button.
  2. The Get Linked Data function fetches published metadata in Open Graph, Microdata, RDFa, Twitter Card, JSON-LD, and HTML formats using the Metascraper library.
  3. Data found that matches title, author, date, description, image, or publisher fields is written to the document.
  4. The function tidies up and stores metadata about the last fetch in the document to help you ensure your link data remains up-to-date.

Key Benefits

  • Afford rich descriptions of the content you link to without having to find and copy or create them
  • Easily capture hero images for linked content
  • Allow external site owners the opportunity to maintain control over their own messaging
  • Use the data content creators publish to deliver a richer, more connected experience for your site visitors.

Technical Implementation

Since this function "queries" remote resources on the web as data stores, it includes more control features and error handling checks than you'll see in basic Sanity Functions. Key elements include:

  • Linked Data Metadata Object: These four fields manage the "state" of your linked data request. State is managed in the Content Lake, not in the UI, which means that different authors will see the same state, even if they're logged in to different Studios.
  • Linked Data Input Component: Linked data is requested on document creation when a valid URL is present, but data changes and paste mishaps occur. Manually refreshing linked data when needed allows you to manage resources efficiently.

This function can be added to any schema that has an existing "URL" field—without requiring you to migrate any data. To see an example of this function integrated into a live project, check out the UX Methods project on GitHub.

Contributor

Other recipes by the contributor

Taxonomy Term Auto-Tag

Use the Sanity Embeddings Index to auto-tag resources from a pre-defined list of taxonomy terms managed in Sanity Studio.

Andy Fitzgerald
Go to Taxonomy Term Auto-Tag

Related Resources by Taxonomy Tag

Three simple approaches to generate ranked lists of related content using taxonomy tags managed in Sanity Studio

Andy Fitzgerald
Go to Related Resources by Taxonomy Tag

Import Taxonomy Terms

Import taxonomy terms, structure, and metadata into the Taxonomy Manager plugin. Includes a spreadsheet template you can use to author and correctly format your taxonomy.

Andy Fitzgerald
Go to Import Taxonomy Terms