diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 5aa17157..cb25daf3 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -546,6 +546,7 @@ func IsBanned(accountID string) bool { // BanAccount silently mutes a user. Their content is hidden from // all other users, but they can still browse and post (to themselves). +// Admins can never be banned — this is a hard safety guard. func BanAccount(accountID string) error { mutex.Lock() defer mutex.Unlock() @@ -553,6 +554,9 @@ func BanAccount(accountID string) error { if !exists { return errors.New("account not found") } + if acc.Admin { + return errors.New("cannot ban an admin account") + } acc.Banned = true data.SaveJSON("accounts.json", accounts) return nil diff --git a/internal/flag/flag.go b/internal/flag/flag.go index c71f470c..ea288603 100644 --- a/internal/flag/flag.go +++ b/internal/flag/flag.go @@ -83,13 +83,12 @@ func CheckContent(contentType, itemID, title, content string) { prompt := `You are a strict content moderator for a family-friendly community. Every post should be meaningful and respectful. This is not a place to waste time, troll, or post crude content. Classify the content with ONLY ONE WORD: -- SPAM (promotional spam, advertising, repetitive junk) -- TEST (test posts like "test", "hello world", meaningless typing) -- LOW_QUALITY (low-effort, memes, nonsensical, no substance, gibberish, single words) -- HARMFUL (vulgar, crude, sexual, obscene, gossip, slander, personal attacks, mocking, trolling, shock content) -- OK (meaningful, on-topic, respectful content that adds value) +- SPAM (promotional spam, advertising, repetitive junk, SEO content) +- LOW_QUALITY (gibberish, random characters, meaningless typing like "asdf", single letters) +- HARMFUL (vulgar, crude, sexual, obscene, gossip, slander, personal attacks, mocking, trolling, shock content, swear words) +- OK (everything else — status updates, opinions, questions, short messages, work updates, casual conversation) -When in doubt, flag it. Better to flag something borderline than let inappropriate content through. +IMPORTANT: Short personal status updates like "Working on X", "Good morning", "Just shipped Y", "Having lunch" are ALWAYS OK. They are normal status messages, not spam or low quality. Only flag content that is clearly abusive, vulgar, or spam. When in doubt, say OK. Respond with just the single word.` @@ -104,7 +103,7 @@ Respond with just the single word.` resp = strings.TrimSpace(strings.ToUpper(resp)) fmt.Printf("Content moderation: %s %s -> %s\n", contentType, itemID, resp) - if resp == "SPAM" || resp == "TEST" || resp == "LOW_QUALITY" || resp == "HARMFUL" { + if resp == "SPAM" || resp == "LOW_QUALITY" || resp == "HARMFUL" { // System auto-flag immediately hides the content — do NOT wait for // 3 user flags. Otherwise spam stays visible until users find it. AdminFlag(contentType, itemID, "system:"+strings.ToLower(resp)) diff --git a/user/user.go b/user/user.go index 0b42d56e..5d03430b 100644 --- a/user/user.go +++ b/user/user.go @@ -427,7 +427,11 @@ func StatusHandler(w http.ResponseWriter, r *http.Request) { status = status[:MaxStatusLength] } - UpdateStatus(sess.Account, status) + if err := UpdateStatus(sess.Account, status); err != nil { + app.Log("status", "UpdateStatus failed for %s: %v", sess.Account, err) + } else { + app.Log("status", "Status updated for %s: %q", sess.Account, status) + } // Async content moderation — flags spam/test/harmful automatically // and auto-bans the user if it's bad. Fire-and-forget. @@ -479,12 +483,11 @@ func PostSystemStatus(text string) error { // The user is never told they've been muted — from their perspective // everything looks normal. func moderateStatus(accountID, text string) { + // Never moderate or ban admins. + if acc, err := auth.GetAccount(accountID); err == nil && acc.Admin { + return + } flag.CheckContent("status", accountID, "", text) - // CheckContent already calls AdminFlag on detection, which hides - // the individual piece. But for status we want escalation: if the - // LLM says SPAM/HARMFUL, ban the entire account. We can - // piggyback on the same LLM result by checking whether the flag - // was set within the last second (i.e. we just created it). item := flag.GetItem("status", accountID) if item != nil && item.Flagged { app.Log("moderation", "Auto-banning %s after status flagged", accountID) @@ -492,10 +495,13 @@ func moderateStatus(accountID, text string) { } } -// moderateAIResponse checks an AI-generated response BEFORE it's posted +// ModerateAIResponse checks an AI-generated response BEFORE it's posted // as a status. Returns true if the response is safe to post. If the -// content is flagged, the requesting user is banned. +// content is flagged, the requesting user is banned (admins are exempt). func ModerateAIResponse(askerID, response string) bool { + if acc, err := auth.GetAccount(askerID); err == nil && acc.Admin { + return true + } flag.CheckContent("ai_response", askerID, "", response) item := flag.GetItem("ai_response", askerID) if item != nil && item.Flagged { @@ -806,6 +812,7 @@ func envInt(key string, def int) int { // the home card can share one code path. func RenderStatusStream(viewerID string) string { entries := StatusStream(StatusStreamMax) + app.Log("status", "RenderStatusStream: %d entries for viewer %s", len(entries), viewerID) var sb strings.Builder if viewerID != "" {