Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions internal/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,13 +546,17 @@ func IsBanned(accountID string) bool {

// BanAccount silently mutes a user. Their content is hidden from
// all other users, but they can still browse and post (to themselves).
// Admins can never be banned — this is a hard safety guard.
func BanAccount(accountID string) error {
mutex.Lock()
defer mutex.Unlock()
acc, exists := accounts[accountID]
if !exists {
return errors.New("account not found")
}
if acc.Admin {
return errors.New("cannot ban an admin account")
}
acc.Banned = true
data.SaveJSON("accounts.json", accounts)
return nil
Expand Down
13 changes: 6 additions & 7 deletions internal/flag/flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,12 @@ func CheckContent(contentType, itemID, title, content string) {
prompt := `You are a strict content moderator for a family-friendly community. Every post should be meaningful and respectful. This is not a place to waste time, troll, or post crude content.

Classify the content with ONLY ONE WORD:
- SPAM (promotional spam, advertising, repetitive junk)
- TEST (test posts like "test", "hello world", meaningless typing)
- LOW_QUALITY (low-effort, memes, nonsensical, no substance, gibberish, single words)
- HARMFUL (vulgar, crude, sexual, obscene, gossip, slander, personal attacks, mocking, trolling, shock content)
- OK (meaningful, on-topic, respectful content that adds value)
- SPAM (promotional spam, advertising, repetitive junk, SEO content)
- LOW_QUALITY (gibberish, random characters, meaningless typing like "asdf", single letters)
- HARMFUL (vulgar, crude, sexual, obscene, gossip, slander, personal attacks, mocking, trolling, shock content, swear words)
- OK (everything else — status updates, opinions, questions, short messages, work updates, casual conversation)

When in doubt, flag it. Better to flag something borderline than let inappropriate content through.
IMPORTANT: Short personal status updates like "Working on X", "Good morning", "Just shipped Y", "Having lunch" are ALWAYS OK. They are normal status messages, not spam or low quality. Only flag content that is clearly abusive, vulgar, or spam. When in doubt, say OK.

Respond with just the single word.`

Expand All @@ -104,7 +103,7 @@ Respond with just the single word.`
resp = strings.TrimSpace(strings.ToUpper(resp))
fmt.Printf("Content moderation: %s %s -> %s\n", contentType, itemID, resp)

if resp == "SPAM" || resp == "TEST" || resp == "LOW_QUALITY" || resp == "HARMFUL" {
if resp == "SPAM" || resp == "LOW_QUALITY" || resp == "HARMFUL" {
// System auto-flag immediately hides the content — do NOT wait for
// 3 user flags. Otherwise spam stays visible until users find it.
AdminFlag(contentType, itemID, "system:"+strings.ToLower(resp))
Expand Down
23 changes: 15 additions & 8 deletions user/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,11 @@ func StatusHandler(w http.ResponseWriter, r *http.Request) {
status = status[:MaxStatusLength]
}

UpdateStatus(sess.Account, status)
if err := UpdateStatus(sess.Account, status); err != nil {
app.Log("status", "UpdateStatus failed for %s: %v", sess.Account, err)
} else {
app.Log("status", "Status updated for %s: %q", sess.Account, status)
}

// Async content moderation — flags spam/test/harmful automatically
// and auto-bans the user if it's bad. Fire-and-forget.
Expand Down Expand Up @@ -479,23 +483,25 @@ func PostSystemStatus(text string) error {
// The user is never told they've been muted — from their perspective
// everything looks normal.
func moderateStatus(accountID, text string) {
// Never moderate or ban admins.
if acc, err := auth.GetAccount(accountID); err == nil && acc.Admin {
return
}
flag.CheckContent("status", accountID, "", text)
// CheckContent already calls AdminFlag on detection, which hides
// the individual piece. But for status we want escalation: if the
// LLM says SPAM/HARMFUL, ban the entire account. We can
// piggyback on the same LLM result by checking whether the flag
// was set within the last second (i.e. we just created it).
item := flag.GetItem("status", accountID)
if item != nil && item.Flagged {
app.Log("moderation", "Auto-banning %s after status flagged", accountID)
auth.BanAccount(accountID)
}
}

// moderateAIResponse checks an AI-generated response BEFORE it's posted
// ModerateAIResponse checks an AI-generated response BEFORE it's posted
// as a status. Returns true if the response is safe to post. If the
// content is flagged, the requesting user is banned.
// content is flagged, the requesting user is banned (admins are exempt).
func ModerateAIResponse(askerID, response string) bool {
if acc, err := auth.GetAccount(askerID); err == nil && acc.Admin {
return true
}
flag.CheckContent("ai_response", askerID, "", response)
item := flag.GetItem("ai_response", askerID)
if item != nil && item.Flagged {
Expand Down Expand Up @@ -806,6 +812,7 @@ func envInt(key string, def int) int {
// the home card can share one code path.
func RenderStatusStream(viewerID string) string {
entries := StatusStream(StatusStreamMax)
app.Log("status", "RenderStatusStream: %d entries for viewer %s", len(entries), viewerID)

var sb strings.Builder
if viewerID != "" {
Expand Down
Loading