Skip to content

Commit

Permalink
metadata creation script setup to work as a module and added to doc c…
Browse files Browse the repository at this point in the history
…onversion script
  • Loading branch information
dougchestnut committed May 9, 2024
1 parent b8cfcde commit 6d42e36
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 86 deletions.
2 changes: 1 addition & 1 deletion apps/bov-minutes/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 0",
"import-docs": "doc2md import/* --outdir minutes --format",
"import-docs": "doc2md import/* --outdir minutes --format --metadata",
"index-docs": "createSearchIndex --outputIndex ./searchIndex.json --inputDir ./minutes --indexType flexsearch",
"build": "pnpm import-docs && pnpm index-docs && NODE_OPTIONS=\"--max-old-space-size=8192\" npx eleventy",
"serve": "NODE_OPTIONS=\"--max-old-space-size=8192\" npx eleventy --serve",
Expand Down
30 changes: 26 additions & 4 deletions packages/doc-conversion-wrapper/convert.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import saxonJs from 'saxon-js';
import sharp from 'sharp';
import { fileURLToPath } from 'url';
import { formatMarkdown as mdAssistantFormat } from '@uvalib/markdown-assistant/index.js';
import { processMarkdown as addMetadata } from '@uvalib/markdown-assistant/metadata.js';

const __dirname = path.dirname(fileURLToPath(import.meta.url)); // Define __dirname for ES Modules
const turndownService = new TurndownService();
Expand All @@ -29,6 +30,11 @@ const argv = yargs(hideBin(process.argv))
type: 'boolean',
description: 'Format the markdown using OpenAI'
})
.option("metadata", {
alias: "m",
type: "boolean",
description: "Add metadata to the markdown file"
})
.argv;

const docPaths = argv._;
Expand Down Expand Up @@ -93,9 +99,17 @@ docPaths.forEach(async docPath => {
output: newFilePath
}).catch(err => { console.error(err); })
console.log("Markdown formatted with OpenAI.");
} else {
console.log(`Markdown file created at ${newFilePath}`);
}
if (argv.metadata) {
await addMetadata({
filePath: newFilePath,
output: newFilePath,
embed: true
}).catch(err => { console.error(err); })
console.log("Metadata added to markdown.");
}
console.log(`Markdown file created at ${newFilePath}`);

})
.catch(err => {
console.error('An error occurred:', err);
Expand Down Expand Up @@ -185,9 +199,17 @@ docPaths.forEach(async docPath => {
output: newFilePath
}).catch(err => { console.error(err); })
console.log("Markdown formatted with OpenAI.");
} else {
console.log(`Markdown file created at ${newFilePath}`);
}
if (argv.metadata) {
await addMetadata({
filePath: newFilePath,
output: newFilePath,
embed: true
}).catch(err => { console.error(err); })
console.log("Metadata added to markdown.");
}
console.log(`Markdown file created at ${newFilePath}`);

} catch (err) {
console.error('An error occurred during transformation:', err);
throw err;
Expand Down
87 changes: 73 additions & 14 deletions packages/markdown-assistant/meta2.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,26 @@
{
"@context": "http://schema.org",
"@type": "Event",
"name": "Board of Visitors Meeting, December 2022",
"name": "Board of Visitors Meeting",
"startDate": "2022-12-08",
"endDate": "2022-12-09",
"location": {
"@type": "Place",
"name": "University of Virginia",
"address": {
"@type": "PostalAddress",
"streetAddress": "1827 University Ave",
"addressLocality": "Charlottesville",
"addressRegion": "VA",
"postalCode": "22903",
"addressCountry": "USA"
}
},
"organizer": {
"@type": "Organization",
"name": "University of Virginia",
"url": "https://www.virginia.edu"
"url": "http://www.virginia.edu"
},
"keywords": "Board of Visitors, UVA, meeting, resolutions, policies",
"description": "The Board of Visitors of the University of Virginia met to adopt and discuss various resolutions concerning university policies, appointments, and memorials.",
"keywords": "University of Virginia, Board of Visitors, Meeting, Resolutions, Education",
"description": "Meeting of the University of Virginia Board of Visitors where various resolutions were adopted and other university matters were discussed.",
"attendee": [
{
"@type": "Person",
Expand Down Expand Up @@ -84,7 +82,8 @@
},
{
"@type": "Person",
"name": "James V. Reyes"
"name": "James V. Reyes",
"additionalName": "via Zoom"
},
{
"@type": "Person",
Expand All @@ -100,23 +99,83 @@
},
{
"@type": "Person",
"name": "Angela H. Mangano",
"description": "Absent"
"name": "James E. Ryan",
"jobTitle": "President"
},
{
"@type": "Person",
"name": "Ian B. Baucom"
},
{
"@type": "Person",
"name": "Jennifer Wagner Davis"
},
{
"@type": "Person",
"name": "Susan G. Harris"
},
{
"@type": "Person",
"name": "Cliff Iler"
},
{
"@type": "Person",
"name": "Melody S. Bianchetto"
},
{
"@type": "Person",
"name": "Kennedy Kipps"
},
{
"@type": "Person",
"name": "Mark Luellen"
},
{
"@type": "Person",
"name": "David Martel"
},
{
"@type": "Person",
"name": "Clark L. 'Chip' Murray"
},
{
"@type": "Person",
"name": "Margaret Grundy Noland"
},
{
"@type": "Person",
"name": "Debra D. Rinker"
},
{
"@type": "Person",
"name": "Margot M. Rogers"
},
{
"@type": "Person",
"name": "Jerilyn Teahan"
},
{
"@type": "Person",
"name": "Tish Jennings",
"jobTitle": "Presenter"
},
{
"@type": "Person",
"name": "Ms. Lily A. Roberts",
"jobTitle": "Presenter"
}
],
"about": [
{
"@type": "Thing",
"name": "Memorial Resolutions",
"description": "Resolutions to commemorate Thomas A. Saunders III and Arnold Harrison Leon."
"name": "Memorial Resolution for Thomas A. Saunders III"
},
{
"@type": "Thing",
"name": "Faculty Appointments and Promotions",
"description": "Decisions on faculty appointments, promotions, and reappointments."
"name": "Memorial Resolution for Arnold Harrison Leon"
}
],
"url": "https://www.virginia.edu/bov/meetings"
"url": "http://www.virginia.edu/boardofvisitors/meetings"
}
</script>

Expand Down
133 changes: 66 additions & 67 deletions packages/markdown-assistant/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ const loadEnv = async () => {
}
};

const processMarkdown = async (apiKey, filePath, instruction, embed) => {
export const processMarkdown = async ({ apiKey, filePath, instruction, embed, output }) => {
try {
const openai = new OpenAI({
apiKey: apiKey
});

// Explicit instruction modification for better heading handling
// Enhance instruction with metadata rules
instruction += `
The metadata should take the following form:
${"```"}
Expand All @@ -38,47 +38,37 @@ ${"```"}
"name": "...",
"startDate": "...",
"endDate": "...",
"location": {...}
},
"location": {...},
"organizer": {...},
"keywords": "keyword1, keyword2, ...",
"description": "...",
"attendee": [...],
"about": [{...},{...}]
},
"about": [{...},{...}],
"url": "..."
}
${"```"}
The following rules must be followed or a litter of kittens will be killed! That would be terrible as you love kittens (you are a cat person). Please don't kill the kittens!!!
Rules:
- Only return the metadata for this document!
- Never make up if the data is not in the document or is not known to be true.
The following rules must be followed:
- Only return the metadata for this document!
- Never make up data if it is not in the document or not known to be true.
- Be sure to list all of the attendees.
- Please be as thorough as possible and use your knowledge of UVA to extend the metadata!
- Don't try to format the data that you return by wrapping it in '${"```"}'
- Please be as thorough as possible.
- Ensure that you return valid json-ld using the schema.org vocabulary.
- Do not wrap the metadata in ${"```"}.
`;

let markdownContent = await fs.readFile(filePath, 'utf8');
const prompt = `${instruction}\n\nHere is the markdown:\n${"```"}${markdownContent}${"```"}`;

const response = await openai.chat.completions.create({
model: "gpt-4-turbo", // Confirm this is the correct model identifier
// model: "gpt-4",
temperature: .8,
messages: [{
role: 'user',
content: prompt
}],

model: "gpt-4-turbo",
temperature: 0.8,
messages: [{ role: 'user', content: prompt }],
});

// Access the content of the first choice's message
let content = response.choices[0].message.content;

if (embed) {
// Embed the JSON-LD metadata into the original markdown content
content = `\n<script type="application/ld+json">\n${content}\n</script>\n` + markdownContent;
}

Expand All @@ -87,58 +77,67 @@ Rules:
.use(stringify)
.process(content);

return String(parsedMarkdown);
const finalContent = String(parsedMarkdown);

if (output) {
await fs.writeFile(output, finalContent);
console.log(`Output written to ${output}`);
}

return finalContent;
} catch (error) {
console.error('Error processing markdown:', error);
throw error;
}
};


const main = async () => {
const envLoaded = await loadEnv();
if (!envLoaded || !process.env.OPENAI_API_KEY) {
console.error('API key is missing.');
return;
}

const argv = yargs(hideBin(process.argv))
.option('file', {
alias: 'f',
describe: 'Path to the markdown file',
type: 'string',
demandOption: true
})
.option('instruction', {
alias: 'i',
describe: 'Instruction to process the markdown',
type: 'string',
default: 'You are a metadata expert that specializes in schema.org and json-ld. You are very knowledgeable about the University of Virginia'
})
.option('output', {
alias: 'o',
describe: 'Output file path',
type: 'string'
})
.option('embed', {
alias: 'e',
type: 'boolean',
describe: 'Embed JSON-LD metadata into the original markdown',
default: false
})
.parse();

try {
const result = await processMarkdown(process.env.OPENAI_API_KEY, argv.file, argv.instruction, argv.embed);
if (argv.output) {
await fs.writeFile(argv.output, result);
console.log(`Output written to ${argv.output}`);
} else {
console.log(result);
if (import.meta.url === `file://${process.argv[1]}`) {
const main = async () => {
const envLoaded = await loadEnv();
if (!envLoaded || !process.env.OPENAI_API_KEY) {
console.error('API key is missing.');
return;
}
} catch (error) {
console.error('Failed to execute script:', error);
}
};

main();
const argv = yargs(hideBin(process.argv))
.option('file', {
alias: 'f',
describe: 'Path to the markdown file',
type: 'string',
demandOption: true
})
.option('instruction', {
alias: 'i',
describe: 'Instruction to process the markdown',
type: 'string',
default: 'You are a metadata expert that specializes in schema.org and json-ld. You are very knowledgeable about the University of Virginia.'
})
.option('output', {
alias: 'o',
describe: 'Output file path',
type: 'string'
})
.option('embed', {
alias: 'e',
type: 'boolean',
describe: 'Embed JSON-LD metadata into the original markdown',
default: false
})
.parse();

await processMarkdown({
apiKey: process.env.OPENAI_API_KEY,
filePath: argv.file,
instruction: argv.instruction,
embed: argv.embed,
output: argv.output
}).catch(error => {
console.error('Failed to execute script:', error);
});
};

main();
}

0 comments on commit 6d42e36

Please sign in to comment.