import 'dotenv/config' import express from 'express' import cors from 'cors' import Parser from 'rss-parser' import OpenAI from 'openai' import Database from 'better-sqlite3' const app = express() const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }) const parser = new Parser() const PORT = 5555 // Initialize SQLite database for caching const db = new Database('cache.db') db.exec(` CREATE TABLE IF NOT EXISTS grouped_news_cache ( id INTEGER PRIMARY KEY, data TEXT NOT NULL, created_at INTEGER NOT NULL ) `) const CACHE_DURATION_MS = 3 * 60 * 60 * 1000 // 3 hours function getCachedGroupedNews() { const row = db.prepare('SELECT data, created_at FROM grouped_news_cache WHERE id = 1').get() if (!row) return null const age = Date.now() - row.created_at if (age > CACHE_DURATION_MS) return null return { data: JSON.parse(row.data), age } } function setCachedGroupedNews(data) { const stmt = db.prepare('INSERT OR REPLACE INTO grouped_news_cache (id, data, created_at) VALUES (1, ?, ?)') stmt.run(JSON.stringify(data), Date.now()) } function clearCache() { db.prepare('DELETE FROM grouped_news_cache').run() } // Source names to filter out from AI summaries const SOURCE_NAMES = [ 'ABC News', 'ABC', 'NPR', 'CNN', 'Reuters', 'NBC News', 'NBC', 'CBS News', 'CBS', 'NY Times', 'New York Times', 'NYT', 'AP News', 'Associated Press', 'AP', 'BBC', 'Guardian', 'The Guardian' ] function replaceSourceNames(text) { if (!text) return text let result = text // Sort by length descending to replace longer names first (e.g., "New York Times" before "NY") const sortedNames = [...SOURCE_NAMES].sort((a, b) => b.length - a.length) for (const name of sortedNames) { // Use word boundaries to only match whole words, not parts of other words const regex = new RegExp(`\\b${name}\\b`, 'gi') result = result.replace(regex, '[news]') } return result } function sanitizeGroups(groups) { return groups.map(group => { const newTitle = replaceSourceNames(group.title) const newSummary = replaceSourceNames(group.summary) if (newTitle !== group.title || newSummary !== group.summary) { console.log(`Replaced source names in group: "${group.title}"`) } return { ...group, title: newTitle, summary: newSummary, } }) } app.use(cors()) const RSS_FEEDS = { abc: 'https://abcnews.go.com/abcnews/topstories', npr: 'https://feeds.npr.org/1001/rss.xml', cnn: 'http://rss.cnn.com/rss/cnn_topstories.rss', nbc: 'https://feeds.nbcnews.com/nbcnews/public/news', cbs: 'https://www.cbsnews.com/latest/rss/main', nytimes: 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml', } app.get('/api/news', async (req, res) => { try { const results = await Promise.allSettled( Object.entries(RSS_FEEDS).map(async ([source, url]) => { const feed = await parser.parseURL(url) return { source, title: feed.title, items: feed.items.map((item) => ({ title: item.title, link: item.link, pubDate: item.pubDate, content: item.contentSnippet || item.content || '', source, image: extractImage(item), })), } }) ) const feeds = results .filter((r) => r.status === 'fulfilled') .map((r) => r.value) const errors = results .filter((r) => r.status === 'rejected') .map((r, i) => ({ source: Object.keys(RSS_FEEDS)[i], error: r.reason.message })) res.json({ feeds, errors }) } catch (error) { res.status(500).json({ error: error.message }) } }) // Endpoint to clear the cache app.post('/api/clear-cache', (req, res) => { clearCache() console.log('Cache cleared by user') res.json({ success: true, message: 'Cache cleared' }) }) app.get('/api/grouped-news', async (req, res) => { try { // Check if user wants to force refresh const forceRefresh = req.query.refresh === 'true' if (forceRefresh) { clearCache() console.log('Force refresh requested - cache cleared') } // Check cache first const cached = getCachedGroupedNews() if (cached) { const remainingMs = CACHE_DURATION_MS - cached.age const remainingMins = Math.round(remainingMs / 60000) console.log(`Serving cached grouped news (${remainingMins} minutes until refresh)`) return res.json({ groups: cached.data, cached: true, cacheExpiresIn: remainingMins }) } console.log('Cache miss - fetching RSS feeds...') // Fetch all news first const results = await Promise.allSettled( Object.entries(RSS_FEEDS).map(async ([source, url]) => { console.log(` Fetching ${source}...`) try { const feed = await parser.parseURL(url) console.log(` ✓ ${source}: ${feed.items.length} articles`) return { source, items: feed.items.map((item) => ({ title: item.title, link: item.link, pubDate: item.pubDate, content: item.contentSnippet || item.content || '', source, image: extractImage(item), })), } } catch (err) { console.log(` ✗ ${source}: ${err.message}`) throw err } }) ) const feedResults = results .filter((r) => r.status === 'fulfilled') .map((r) => r.value) // Ensure at least 5 articles from each source, then fill rest by date const MIN_PER_SOURCE = 5 const TOTAL_LIMIT = 50 let selectedArticles = [] const usedIds = new Set() // First pass: take up to MIN_PER_SOURCE from each source (sorted by date) for (const feed of feedResults) { const sorted = [...feed.items].sort((a, b) => new Date(b.pubDate) - new Date(a.pubDate)) const toTake = sorted.slice(0, MIN_PER_SOURCE) for (const article of toTake) { const id = article.link if (!usedIds.has(id)) { usedIds.add(id) selectedArticles.push(article) } } } // Second pass: fill remaining slots with newest articles across all sources const allRemaining = feedResults .flatMap((f) => f.items) .filter((a) => !usedIds.has(a.link)) .sort((a, b) => new Date(b.pubDate) - new Date(a.pubDate)) const remaining = TOTAL_LIMIT - selectedArticles.length selectedArticles.push(...allRemaining.slice(0, remaining)) // Final sort by date const allArticles = selectedArticles.sort((a, b) => new Date(b.pubDate) - new Date(a.pubDate)) console.log(`Selected ${allArticles.length} articles (min ${MIN_PER_SOURCE}/source, then by date)`) if (allArticles.length === 0) { return res.json({ groups: [] }) } // Send to OpenAI for grouping const articlesForAI = allArticles.map((a, i) => ({ id: i, title: a.title, content: a.content?.slice(0, 200) || '', source: a.source, })) console.log(`Sending ${articlesForAI.length} articles to OpenAI gpt-5-mini...`) const completion = await openai.chat.completions.create({ model: 'gpt-5-mini', messages: [ { role: 'system', content: `You are a news analyst. Group articles that cover THE SAME SPECIFIC NEWS STORY together. IMPORTANT RULES: - Each group must contain articles about ONE specific news event or story - Do NOT combine unrelated topics into a single group - Do NOT create broad category groups (e.g., "Various Political News") - Articles about different events should be in SEPARATE groups, even if they share a category - It's better to have more specific groups than fewer broad ones - If an article doesn't match any group, put it in its own single-article group WRITING RULES: - Write ORIGINAL titles - do NOT copy or closely paraphrase headlines from the source articles - Write ORIGINAL summaries in your own words - do NOT copy sentences from the articles - Synthesize information from multiple sources into a fresh, unique narrative - Use different phrasing and sentence structure than the originals - Never mention the news source names (ABC, CNN, NPR, etc.) in titles or summaries Return JSON in this exact format: { "groups": [ { "title": "Your original headline for this story (max 80 chars)", "summary": "Your original summary synthesizing the story in your own words (max 500 chars)", "articleIds": [0, 1, 2], "category": "politics|business|technology|sports|entertainment|health|science|world|other" } ] } Only return valid JSON.` }, { role: 'user', content: JSON.stringify(articlesForAI) } ], }) const aiResponse = JSON.parse(completion.choices[0].message.content) console.log(`✓ OpenAI returned ${aiResponse.groups.length} groups`) // Enrich groups with source articles and images const enrichedGroups = aiResponse.groups.map((group) => { const groupArticles = group.articleIds .map((id) => allArticles[id]) .filter(Boolean) const images = groupArticles .map((a) => a.image) .filter(Boolean) const sources = [...new Set(groupArticles.map((a) => a.source))] const links = groupArticles.map((a) => ({ title: a.title, link: a.link, source: a.source })) return { title: group.title, summary: group.summary, category: group.category, image: images[0] || null, sources, articles: links, articleCount: groupArticles.length, } }) // Replace any source name mentions with [news] const sanitizedGroups = sanitizeGroups(enrichedGroups) // Cache the results setCachedGroupedNews(sanitizedGroups) console.log(`Cached ${sanitizedGroups.length} grouped news for 3 hours`) res.json({ groups: sanitizedGroups, cached: false }) } catch (error) { console.error('Grouped news error:', error) res.status(500).json({ error: error.message }) } }) function extractImage(item) { if (item.enclosure?.url) return item.enclosure.url if (item['media:content']?.$.url) return item['media:content'].$.url if (item['media:thumbnail']?.$.url) return item['media:thumbnail'].$.url const contentMatch = (item.content || '').match(/]+src="([^"]+)"/) if (contentMatch) return contentMatch[1] return null } app.listen(PORT, () => { console.log(`API server running on http://localhost:${PORT}`) })