diff --git a/.gitignore b/.gitignore index b2396ba..a17500f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,149 +1,152 @@ -# Logs -logs -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* -lerna-debug.log* -.pnpm-debug.log* - -# Diagnostic reports (https://nodejs.org/api/report.html) -report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json - -# Runtime data -pids -*.pid -*.seed -*.pid.lock - -# Directory for instrumented libs generated by jscoverage/JSCover -lib-cov - -# Coverage directory used by tools like istanbul -coverage -*.lcov - -# nyc test coverage -.nyc_output - -# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) -.grunt - -# Bower dependency directory (https://bower.io/) -bower_components - -# node-waf configuration -.lock-wscript - -# Compiled binary addons (https://nodejs.org/api/addons.html) -build/Release - -# Dependency directories -node_modules/ -jspm_packages/ - -# Snowpack dependency directory (https://snowpack.dev/) -web_modules/ - -# TypeScript cache -*.tsbuildinfo - -# Optional npm cache directory -.npm - -# Optional eslint cache -.eslintcache - -# Optional stylelint cache -.stylelintcache - -# Microbundle cache -.rpt2_cache/ -.rts2_cache_cjs/ -.rts2_cache_es/ -.rts2_cache_umd/ - -# Optional REPL history -.node_repl_history - -# Output of 'npm pack' -*.tgz - -# Yarn Integrity file -.yarn-integrity - -# dotenv environment variable files -.env -.env.development.local -.env.test.local -.env.production.local -.env.local - -# parcel-bundler cache (https://parceljs.org/) -.cache -.parcel-cache - -# Next.js build output -.next -out - -# Nuxt.js build / generate output -.nuxt -dist - -# Gatsby files -.cache/ -# Comment in the public line in if your project uses Gatsby and not Next.js -# https://nextjs.org/blog/next-9-1#public-directory-support -# public - -# vuepress build output -.vuepress/dist - -# vuepress v2.x temp and cache directory -.temp -.cache - -# Docusaurus cache and generated files -.docusaurus - -# Serverless directories -.serverless/ - -# FuseBox cache -.fusebox/ - -# DynamoDB Local files -.dynamodb/ - -# TernJS port file -.tern-port - -# Stores VSCode versions used for testing VSCode extensions -.vscode-test - -# yarn v2 -.yarn/cache -.yarn/unplugged -.yarn/build-state.yml -.yarn/install-state.gz -.pnp.* - -# vscode -.vscode - -# analytics -analyze/ - -# heapsnapshot -*.heapsnapshot - -# turbo -.turbo/ - -# pagefind postbuild -public/_pagefind/ -public/sitemap.xml - -# npm package lock file for different platforms -package-lock.json \ No newline at end of file +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +# vscode +.vscode + +# analytics +analyze/ + +# heapsnapshot +*.heapsnapshot + +# turbo +.turbo/ + +# pagefind postbuild +public/_pagefind/ +public/sitemap.xml + +# npm package lock file for different platforms +package-lock.json + +# generated +mcp-worker/generated \ No newline at end of file diff --git a/mcp-worker/src/index.mjs b/mcp-worker/src/index.mjs new file mode 100644 index 0000000..2badb12 --- /dev/null +++ b/mcp-worker/src/index.mjs @@ -0,0 +1,227 @@ +import { Server } from '@modelcontextprotocol/sdk/server/index.js' +import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js' +import { + CallToolRequestSchema, + ListToolsRequestSchema +} from '@modelcontextprotocol/sdk/types.js' + +import { notesData } from '../generated/notes-data.mjs' + +const MAX_SEARCH_RESULTS = 10 +const SNIPPET_RADIUS = 220 +const transports = new Map() + +function normalizeNoteId(noteId = '') { + const normalized = String(noteId).trim().replaceAll('\\', '/').replace(/^\/+|\/+$/g, '') + if (!normalized || normalized.includes('..')) { + return null + } + + return normalized +} + +function resolveNote(noteId) { + const normalized = normalizeNoteId(noteId) + if (!normalized) { + return null + } + + return notesData.find((note) => + note.slug === normalized || + note.relativePath === normalized || + note.relativePath.replace(/\.(md|mdx)$/i, '') === normalized + ) ?? null +} + +function buildSnippet(content, index, query) { + const start = Math.max(0, index - SNIPPET_RADIUS) + const end = Math.min(content.length, index + query.length + SNIPPET_RADIUS) + return content + .slice(start, end) + .replace(/\s+/g, ' ') + .trim() +} + +function textResponse(text) { + return { + content: [{ type: 'text', text }] + } +} + +function createServer() { + const server = new Server( + { + name: 'notenextra-notes-worker', + version: '1.0.0' + }, + { + capabilities: { + tools: {} + } + } + ) + + server.setRequestHandler(ListToolsRequestSchema, async () => ({ + tools: [ + { + name: 'list_notes', + description: 'List available notes from the generated notes knowledge base.', + inputSchema: { + type: 'object', + properties: { + course: { + type: 'string', + description: 'Optional course or directory prefix, for example CSE442T or Math4201.' + } + } + } + }, + { + name: 'read_note', + description: 'Read a note by slug or relative path, for example CSE442T/CSE442T_L1.', + inputSchema: { + type: 'object', + properties: { + noteId: { + type: 'string', + description: 'Note slug or relative path inside content/.' + } + }, + required: ['noteId'] + } + }, + { + name: 'search_notes', + description: 'Search the notes knowledge base using a simple text match over all markdown content.', + inputSchema: { + type: 'object', + properties: { + query: { + type: 'string', + description: 'Search term or phrase.' + }, + limit: { + type: 'number', + description: `Maximum results to return, capped at ${MAX_SEARCH_RESULTS}.` + } + }, + required: ['query'] + } + } + ] + })) + + server.setRequestHandler(CallToolRequestSchema, async (request) => { + const { name, arguments: args = {} } = request.params + + if (name === 'list_notes') { + const course = typeof args.course === 'string' + ? args.course.trim().toLowerCase() + : '' + const filtered = course + ? notesData.filter((note) => note.relativePath.toLowerCase().startsWith(`${course}/`)) + : notesData + + return textResponse(filtered.map((note) => note.slug).join('\n') || 'No notes found.') + } + + if (name === 'read_note') { + const note = resolveNote(args.noteId) + if (!note) { + return textResponse('Note not found.') + } + + return textResponse(`# ${note.slug}\n\n${note.content}`) + } + + if (name === 'search_notes') { + const query = typeof args.query === 'string' ? args.query.trim() : '' + if (!query) { + return textResponse('Query must be a non-empty string.') + } + + const limit = Math.max(1, Math.min(Number(args.limit) || 5, MAX_SEARCH_RESULTS)) + const queryLower = query.toLowerCase() + const matches = [] + + for (const note of notesData) { + const haystack = `${note.slug}\n${note.content}` + const index = haystack.toLowerCase().indexOf(queryLower) + if (index === -1) { + continue + } + + matches.push({ + note, + index, + snippet: buildSnippet(haystack, index, query) + }) + } + + matches.sort((a, b) => a.index - b.index || a.note.slug.localeCompare(b.note.slug)) + + return textResponse( + matches + .slice(0, limit) + .map(({ note, snippet }) => `- ${note.slug}\n${snippet}`) + .join('\n\n') || 'No matches found.' + ) + } + + throw new Error(`Unknown tool: ${name}`) + }) + + return server +} + +async function handleMcpRequest(request) { + const sessionId = request.headers.get('mcp-session-id') + let transport = sessionId ? transports.get(sessionId) : undefined + + if (!transport && request.method === 'POST') { + transport = new WebStandardStreamableHTTPServerTransport({ + sessionIdGenerator: () => crypto.randomUUID(), + enableJsonResponse: true, + onsessioninitialized: (newSessionId) => { + transports.set(newSessionId, transport) + }, + onsessionclosed: (closedSessionId) => { + transports.delete(closedSessionId) + } + }) + + transport.onclose = () => { + if (transport.sessionId) { + transports.delete(transport.sessionId) + } + } + + const server = createServer() + await server.connect(transport) + } + + if (!transport) { + return new Response('Invalid or missing MCP session.', { status: 400 }) + } + + return transport.handleRequest(request) +} + +export default { + async fetch(request) { + const url = new URL(request.url) + + if (url.pathname === '/health') { + return Response.json({ + status: 'ok', + notes: notesData.length + }) + } + + if (url.pathname === '/mcp') { + return handleMcpRequest(request) + } + + return new Response('Not found.', { status: 404 }) + } +} diff --git a/mcp-worker/wrangler.toml b/mcp-worker/wrangler.toml new file mode 100644 index 0000000..97ad765 --- /dev/null +++ b/mcp-worker/wrangler.toml @@ -0,0 +1,4 @@ +name = "notenextra-mcp" +main = "src/index.mjs" +compatibility_date = "2025-02-13" +compatibility_flags = ["nodejs_compat"] diff --git a/package.json b/package.json index cd6f53b..a409f6d 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,11 @@ "postbuild": "next-sitemap && pagefind --site .next/server/app --output-path out/_pagefind", "start": "next start", "mcp:notes": "node ./mcp-server.mjs", - "test:mcp": "node ./test/test-mcp-server.mjs" + "mcp:worker:build-data": "node ./scripts/generate-mcp-worker-data.mjs", + "mcp:worker:deploy": "npm run mcp:worker:build-data && npx wrangler deploy --config mcp-worker/wrangler.toml", + "mcp:worker:deploy:dry-run": "npm run mcp:worker:build-data && npx wrangler deploy --dry-run --config mcp-worker/wrangler.toml", + "test:mcp": "node ./test/test-mcp-server.mjs", + "test:mcp:worker": "node ./test/test-mcp-worker.mjs" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.18.1", @@ -34,4 +38,4 @@ "@types/node": "24.10.0", "@types/react": "19.2.2" } -} +} diff --git a/scripts/generate-mcp-worker-data.mjs b/scripts/generate-mcp-worker-data.mjs new file mode 100644 index 0000000..6d72142 --- /dev/null +++ b/scripts/generate-mcp-worker-data.mjs @@ -0,0 +1,47 @@ +import fs from 'node:fs/promises' +import path from 'node:path' +import process from 'node:process' + +const CONTENT_ROOT = path.join(process.cwd(), 'content') +const OUTPUT_DIR = path.join(process.cwd(), 'mcp-worker', 'generated') +const OUTPUT_FILE = path.join(OUTPUT_DIR, 'notes-data.mjs') +const NOTE_EXTENSIONS = new Set(['.md', '.mdx']) + +async function walkNotes(dir = CONTENT_ROOT) { + const entries = await fs.readdir(dir, { withFileTypes: true }) + const notes = await Promise.all(entries.map(async (entry) => { + const fullPath = path.join(dir, entry.name) + + if (entry.isDirectory()) { + return walkNotes(fullPath) + } + + if (!entry.isFile() || !NOTE_EXTENSIONS.has(path.extname(entry.name))) { + return [] + } + + const relativePath = path.relative(CONTENT_ROOT, fullPath).replaceAll('\\', '/') + const slug = relativePath.replace(/\.(md|mdx)$/i, '') + const content = await fs.readFile(fullPath, 'utf8') + + return [{ + slug, + relativePath, + title: path.basename(slug), + content + }] + })) + + return notes.flat().sort((a, b) => a.relativePath.localeCompare(b.relativePath)) +} + +const notes = await walkNotes() + +await fs.mkdir(OUTPUT_DIR, { recursive: true }) +await fs.writeFile( + OUTPUT_FILE, + `export const notesData = ${JSON.stringify(notes, null, 2)};\n`, + 'utf8' +) + +process.stdout.write(`Generated ${notes.length} notes for MCP worker.\n`) diff --git a/test/test-mcp-worker.mjs b/test/test-mcp-worker.mjs new file mode 100644 index 0000000..260cc08 --- /dev/null +++ b/test/test-mcp-worker.mjs @@ -0,0 +1,121 @@ +import assert from 'node:assert/strict' +import process from 'node:process' + +import worker from '../mcp-worker/src/index.mjs' +import { LATEST_PROTOCOL_VERSION } from '@modelcontextprotocol/sdk/types.js' + +function makeJsonRequest(url, body, headers = {}) { + return new Request(url, { + method: 'POST', + headers: { + accept: 'application/json, text/event-stream', + 'content-type': 'application/json', + ...headers + }, + body: JSON.stringify(body) + }) +} + +const baseUrl = 'https://example.com' + +const healthResponse = await worker.fetch(new Request(`${baseUrl}/health`)) +assert.equal(healthResponse.status, 200) +const healthJson = await healthResponse.json() +assert.equal(healthJson.status, 'ok') +assert.ok(healthJson.notes > 0) + +const initializeResponse = await worker.fetch(makeJsonRequest(`${baseUrl}/mcp`, { + jsonrpc: '2.0', + id: 1, + method: 'initialize', + params: { + protocolVersion: LATEST_PROTOCOL_VERSION, + capabilities: {}, + clientInfo: { + name: 'notenextra-worker-test', + version: '1.0.0' + } + } +})) + +assert.equal(initializeResponse.status, 200) +const sessionId = initializeResponse.headers.get('mcp-session-id') +assert.ok(sessionId, 'initialize should return an MCP session ID') +const protocolVersion = initializeResponse.headers.get('mcp-protocol-version') || LATEST_PROTOCOL_VERSION +const initializeJson = await initializeResponse.json() +assert.ok(initializeJson.result, 'initialize should return a result payload') + +const toolListResponse = await worker.fetch(makeJsonRequest(`${baseUrl}/mcp`, { + jsonrpc: '2.0', + id: 2, + method: 'tools/list', + params: {} +}, { + 'mcp-protocol-version': protocolVersion, + 'mcp-session-id': sessionId +})) + +assert.equal(toolListResponse.status, 200) +const toolListJson = await toolListResponse.json() +const toolNames = toolListJson.result.tools.map((tool) => tool.name).sort() +assert.deepEqual(toolNames, ['list_notes', 'read_note', 'search_notes']) + +const listNotesResponse = await worker.fetch(makeJsonRequest(`${baseUrl}/mcp`, { + jsonrpc: '2.0', + id: 3, + method: 'tools/call', + params: { + name: 'list_notes', + arguments: { + course: 'CSE442T' + } + } +}, { + 'mcp-protocol-version': protocolVersion, + 'mcp-session-id': sessionId +})) + +assert.equal(listNotesResponse.status, 200) +const listNotesJson = await listNotesResponse.json() +assert.match(listNotesJson.result.content[0].text, /CSE442T\/CSE442T_L1/) + +const readNoteResponse = await worker.fetch(makeJsonRequest(`${baseUrl}/mcp`, { + jsonrpc: '2.0', + id: 4, + method: 'tools/call', + params: { + name: 'read_note', + arguments: { + noteId: 'about' + } + } +}, { + 'mcp-protocol-version': protocolVersion, + 'mcp-session-id': sessionId +})) + +assert.equal(readNoteResponse.status, 200) +const readNoteJson = await readNoteResponse.json() +assert.match(readNoteJson.result.content[0].text, /This is a static server for me to share my notes/i) + +const searchResponse = await worker.fetch(makeJsonRequest(`${baseUrl}/mcp`, { + jsonrpc: '2.0', + id: 5, + method: 'tools/call', + params: { + name: 'search_notes', + arguments: { + query: "Kerckhoffs' principle", + limit: 3 + } + } +}, { + 'mcp-protocol-version': protocolVersion, + 'mcp-session-id': sessionId +})) + +assert.equal(searchResponse.status, 200) +const searchJson = await searchResponse.json() +assert.match(searchJson.result.content[0].text, /CSE442T\/CSE442T_L1/) + +process.stdout.write('MCP worker test passed.\n')