jackwener · Greatkai · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 6, 2026
diff --git a/PULL_REQUEST.md b/PULL_REQUEST.md
@@ -0,0 +1,167 @@
+# Pull Request: Add PubMed Adapter
+
+## Summary
+
+This PR adds comprehensive PubMed academic literature search capabilities to OpenCLI, enabling researchers and clinicians to access the PubMed database directly from the command line.
+
+## Features
+
+### New Commands (6 total)
+
+| Command | Description |
+|---------|-------------|
+| `pubmed search` | Advanced article search with multiple filters |
+| `pubmed article` | Detailed article metadata retrieval |
+| `pubmed author` | Search by author name and affiliation |
+| `pubmed citations` | Get citation relationships |
+| `pubmed related` | Find semantically similar articles |
+| `pubmed config` | Configure API key for higher rate limits |
+
+### Key Capabilities
+
+- **Advanced Search Filters**: date range, author, journal, article type, abstract availability, free full text, human studies, language
+- **Complete Author Information**: first author, corresponding author, all authors, affiliations
+- **Publication Metadata**: title, abstract, journal, volume, issue, pages, publication date
+- **Academic Identifiers**: PMID, DOI, PMC ID
+- **Classification**: MeSH terms, keywords, article type
+- **Citation Analysis**: find articles that cite a given article or articles cited by it
+- **Related Articles**: discover semantically similar articles using PubMed's algorithm
+
+## API Integration
+
+Uses NCBI E-utilities APIs:
+- **ESearch**: Search for articles and retrieve PMIDs
+- **ESummary**: Get summary information for PMIDs
+- **EFetch**: Retrieve full article details including abstracts
+- **ELink**: Get citation relationships and related articles
+
+## Technical Implementation
+
+- **Language**: TypeScript with full type safety
+- **Strategy**: `Strategy.PUBLIC` (PubMed E-utilities is a public API)
+- **Rate Limiting**: Automatic delays to respect NCBI limits (3 req/s public, 10 req/s with API key)
+- **Error Handling**: Consistent use of `CliError` for all error cases
+- **Code Quality**: Follows existing OpenCLI patterns (similar to arXiv adapter)
+
+## Files Added
+
+```
+clis/pubmed/
+├── utils.ts      # Shared utilities for API calls and data parsing
+├── search.ts     # Article search with advanced filters
+├── article.ts    # Detailed article information
+├── author.ts     # Author-based search
+├── citations.ts  # Citation relationships
+├── related.ts    # Related articles discovery
+└── config.ts     # API key configuration for higher rate limits
+```
+
+## Usage Examples
+
+```bash
+# Search with filters
+opencli pubmed search "machine learning cancer" --year-from 2023 --has-abstract
+
+# Get article details
+opencli pubmed article 37780221 --format json
+
+# Search by author
+opencli pubmed author "Smith J" --affiliation "Stanford" --position first
+
+# Citation analysis
+opencli pubmed citations 37780221 --direction citedby --limit 50
+
+# Find related articles
+opencli pubmed related 37780221 --score
+
+# Configure API key for higher rate limits (10 req/s vs 3 req/s)
+opencli pubmed config set --key api-key --value YOUR_NCBI_API_KEY
+
+# View current configuration
+opencli pubmed config get
+
+# Remove API key
+opencli pubmed config remove --key api-key
+```
+
+## Testing
+
+- [x] Built successfully with `npm run build`
+- [x] Manifest compiled: 260 entries (121 YAML, 139 TS)
+- [x] All TypeScript files compile without errors
+- [ ] Runtime testing (requires NCBI API access)
+
+## Documentation
+
+A comprehensive README is available in the contribution package with:
+- Installation instructions
+- Detailed usage examples
+- API reference
+- Rate limit information
+- File structure explanation
+
+## Rate Limits & API Key Configuration
+
+The adapter implements automatic rate limiting:
+- **Without API key**: 350ms delay between requests (≈3 req/s)
+- **With API key**: 100ms delay between requests (≈10 req/s)
+
+### Getting an API Key
+
+1. Create an NCBI account at https://www.ncbi.nlm.nih.gov/account/
+2. Go to https://www.ncbi.nlm.nih.gov/account/settings/
+3. Generate an API key
+
+### Configuring Your API Key
+
+```bash
+# Set your API key
+opencli pubmed config set --key api-key --value YOUR_API_KEY
+
+# Set your email (recommended for identification)
+opencli pubmed config set --key email --value your@email.com
+
+# View current configuration
+opencli pubmed config get
+
+# Remove configuration
+opencli pubmed config remove --key api-key
+```
+
+Configuration is stored in `~/.opencli/pubmed-config.json`.
+
+### Environment Variables
+
+Alternatively, you can use environment variables:
+- `NCBI_API_KEY` - Your NCBI API key
+- `NCBI_EMAIL` - Your email address
+
+## Backwards Compatibility
+
+This is a new adapter with no impact on existing functionality.
+
+## Checklist
+
+- [x] Code follows OpenCLI conventions
+- [x] TypeScript types are properly defined
+- [x] Error handling uses CliError
+- [x] Rate limiting is implemented
+- [x] Build passes successfully
+- [x] No breaking changes to existing code
+
+## Author
+
+**GreatKai** working with WorkBuddy - Building tools to help researchers access scientific literature more efficiently.
+
+## References
+
+- [NCBI E-utilities Documentation](https://www.ncbi.nlm.nih.gov/books/NBK25501/)
+- [PubMed](https://pubmed.ncbi.nlm.nih.gov/)
+
+---
+
+**Related Issue**: N/A (new feature)
+
+**Breaking Changes**: None
+
+**Dependencies**: None (uses native fetch API)
diff --git a/clis/pubmed/article.ts b/clis/pubmed/article.ts
@@ -0,0 +1,225 @@
+/**
+ * PubMed Article Details Adapter
+ *
+ * Get detailed information about a specific PubMed article by PMID.
+ * Uses EFetch API (XML) for full article details including abstract,
+ * MeSH terms, keywords, and author affiliations.
+ *
+ * API Documentation:
+ * - EFetch: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
+ */
+
+import { cli, Strategy } from '@jackwener/opencli/registry';
+import { CliError } from '@jackwener/opencli/errors';
+import {
+  eutilsFetchText,
+  buildPubMedUrl,
+  truncateText,
+  prioritizeArticleType,
+} from './utils.js';
+
+/**
+ * Parse EFetch XML response to extract full article details
+ */
+function parseEFetchXml(xml: string, pmid: string) {
+  // Helper: extract text content between tags
+  const getTag = (src: string, tag: string): string => {
+    const m = src.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\/${tag}>`, 'i'));
+    return m ? m[1].replace(/<[^>]+>/g, '').trim() : '';
+  };
+
+  const getAllTags = (src: string, tag: string): string[] => {
+    const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\/${tag}>`, 'gi');
+    const results: string[] = [];
+    let m;
+    while ((m = re.exec(src)) !== null) {
+      results.push(m[1].replace(/<[^>]+>/g, '').trim());
+    }
+    return results;
+  };
+
+  // Abstract - may have multiple AbstractText sections (structured abstract)
+  const abstractParts = getAllTags(xml, 'AbstractText');
+  const abstract = abstractParts.join(' ').replace(/\s+/g, ' ').trim();
+
+  // Title
+  const title = getTag(xml, 'ArticleTitle');
+
+  // Journal
+  const journalTitle = getTag(xml, 'Title');
+  const isoAbbreviation = getTag(xml, 'ISOAbbreviation');
+  const volume = getTag(xml, 'Volume');
+  const issue = getTag(xml, 'Issue');
+  const pagination = getTag(xml, 'MedlinePgn');
+
+  // Publication date
+  const year = getTag(xml, 'Year') || getTag(xml, 'MedlineDate').slice(0, 4);
+  const month = getTag(xml, 'Month');
+  const day = getTag(xml, 'Day');
+  const fullDate = [year, month, day].filter(Boolean).join(' ');
+
+  // Authors and affiliations
+  const authorBlocks = xml.match(/<Author[^>]*>([\s\S]*?)<\/Author>/gi) || [];
+  const authors: Array<{ name: string; affiliation: string }> = authorBlocks.map(block => {
+    const lastName = getTag(block, 'LastName');
+    const foreName = getTag(block, 'ForeName') || getTag(block, 'Initials');
+    const collectiveName = getTag(block, 'CollectiveName');
+    const name = collectiveName || `${lastName} ${foreName}`.trim();
+    const affiliation = getTag(block, 'Affiliation');
+    return { name, affiliation };
+  });
+
+  const allAuthors = authors.map(a => a.name);
+  const firstAuthor = allAuthors[0] || '';
+  const correspondingAuthor = allAuthors[allAuthors.length - 1] || '';
+
+  // Unique affiliations
+  const affiliations = [...new Set(
+    authors.map(a => a.affiliation).filter(Boolean)
+  )];
+
+  // MeSH terms
+  const meshBlocks = xml.match(/<MeshHeading>([\s\S]*?)<\/MeshHeading>/gi) || [];
+  const meshTerms = meshBlocks
+    .map(block => getTag(block, 'DescriptorName'))
+    .filter(Boolean)
+    .slice(0, 10);
+
+  // Keywords
+  const keywords = getAllTags(xml, 'Keyword').filter(Boolean).slice(0, 10);
+
+  // Article type - PubMed returns multiple types, prioritize more specific ones
+  const pubTypes = getAllTags(xml, 'PublicationType').filter(Boolean);
+  const articleType = prioritizeArticleType(pubTypes);
+
+  // Language
+  const language = getTag(xml, 'Language');
+
+  // IDs: DOI
+  const doiMatch = xml.match(/<ArticleId IdType="doi">([^<]+)<\/ArticleId>/i);
+  const doi = doiMatch ? doiMatch[1].trim() : '';
+
+  const pmcMatch = xml.match(/<ArticleId IdType="pmc">([^<]+)<\/ArticleId>/i);
+  const pmcId = pmcMatch ? pmcMatch[1].trim() : '';
+
+  return {
+    pmid,
+    title,
+    abstract,
+    authors: {
+      list: allAuthors,
+      all: allAuthors.slice(0, 10).join(', ') + (allAuthors.length > 10 ? ', et al.' : ''),
+      first: firstAuthor,
+      corresponding: correspondingAuthor,
+      count: allAuthors.length,
+      affiliations,
+    },
+    journal: {
+      title: journalTitle,
+      isoAbbreviation,
+      volume,
+      issue,
+      pagination,
+    },
+    publication: {
+      year,
+      fullDate,
+    },
+    ids: {
+      pmid,
+      doi,
+      pmc: pmcId,
+    },
+    classification: {
+      articleType,
+      pubTypes,
+      language,
+      meshTerms,
+      keywords,
+    },
+    url: buildPubMedUrl(pmid),
+  };
+}
+
+cli({
+  site: 'pubmed',
+  name: 'article',
+  description: 'Get detailed information about a PubMed article by PMID (full abstract, MeSH terms, affiliations)',
+  strategy: Strategy.PUBLIC,
+  browser: false,
+  args: [
+    {
+      name: 'pmid',
+      type: 'string',
+      required: true,
+      positional: true,
+      help: 'PubMed ID (e.g., "37780221")',
+    },
+    {
+      name: 'output',
+      type: 'string',
+      default: 'table',
+      help: 'Output format: table (summary) or json (full details)',
+    },
+  ],
+  columns: ['field', 'value'],
+  func: async (_page, args) => {
+    const pmid = args.pmid.trim();
+
+    if (!/^\d+$/.test(pmid)) {
+      throw new CliError(
+        'INVALID_ARGUMENT',
+        `Invalid PMID format: ${pmid}`,
+        'PMID should be a numeric string (e.g., "37780221")'
+      );
+    }
+
+    // Use EFetch to get full article details (XML includes abstract, MeSH, affiliations)
+    const xml = await eutilsFetchText('efetch', {
+      id: pmid,
+      rettype: 'abstract',
+      retmode: 'xml',
+    });
+
+    if (!xml || xml.includes('<ERROR>') || !xml.includes('<PubmedArticle>')) {
+      throw new CliError(
+        'NOT_FOUND',
+        `Article with PMID ${pmid} not found`,
+        'Check the PMID and try again'
+      );
+    }
+
+    const article = parseEFetchXml(xml, pmid);
+
+    if (args.output === 'json') {
+      return [{
+        field: 'data',
+        value: JSON.stringify(article, null, 2),
+      }];
+    }
+
+    // Table format
+    const rows: Array<{ field: string; value: string }> = [
+      { field: 'PMID', value: article.pmid },
+      { field: 'Title', value: article.title },
+      { field: 'First Author', value: article.authors.first },
+      { field: 'Corresponding Author', value: article.authors.corresponding },
+      { field: 'All Authors', value: truncateText(article.authors.all, 120) },
+      { field: 'Affiliations', value: truncateText(article.authors.affiliations[0] || 'N/A', 120) },
+      { field: 'Journal', value: article.journal.title || article.journal.isoAbbreviation },
+      { field: 'Year', value: article.publication.year },
+      { field: 'Volume/Issue', value: `${article.journal.volume}${article.journal.issue ? `(${article.journal.issue})` : ''}` },
+      { field: 'Pages', value: article.journal.pagination },
+      { field: 'DOI', value: article.ids.doi || 'N/A' },
+      { field: 'PMC ID', value: article.ids.pmc || 'N/A' },
+      { field: 'Article Type', value: article.classification.articleType },
+      { field: 'Language', value: article.classification.language },
+      { field: 'MeSH Terms', value: article.classification.meshTerms.join(', ') || 'N/A' },
+      { field: 'Keywords', value: article.classification.keywords.join(', ') || 'N/A' },
+      { field: 'Abstract', value: truncateText(article.abstract, 400) || 'N/A' },
+      { field: 'URL', value: article.url },
+    ];
+
+    return rows;
+  },
+});