vladkens · vladkens · Jun 8, 2026 · Jun 1, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -544,6 +544,22 @@ async def test_issue_42():
     assert doc.rawContent.endswith(doc.retweetedTweet.rawContent)
 
 
+def test_retweet_not_duplicated():
+    """The original tweet embedded inside a retweet must not also be yielded
+    as a standalone top-level item by parse_tweets."""
+    raw = fake_rep("_issue_42").json()
+    tweets = list(parse_tweets(raw))
+
+    rt_wrapper = next((t for t in tweets if t.id == 1665951747842641921), None)
+    assert rt_wrapper is not None, "RT wrapper tweet not found"
+    assert rt_wrapper.retweetedTweet is not None
+
+    original_id = rt_wrapper.retweetedTweet.id
+    assert all(t.id != original_id for t in tweets), (
+        f"retweetedTweet {original_id} leaked as a standalone top-level item"
+    )
+
+
 async def test_issue_56():
     raw = fake_rep("_issue_56").json()
     doc = parse_tweet(raw, 1682072224013099008)
@@ -552,6 +568,24 @@ async def test_issue_56():
     assert len(doc.links) == 5
 
 
+async def test_issue_310():
+    api = get_api()
+    mock_rep(api.user_tweets_raw, "raw_user_tweets", as_generator=True)
+
+    tweets = await gather(api.user_tweets(2244994945))
+    top_level_ids = {x.id for x in tweets}
+    retweeted_ids = {x.retweetedTweet.id for x in tweets if x.retweetedTweet is not None}
+    leaked_ids = top_level_ids & retweeted_ids
+
+    assert retweeted_ids
+    assert not leaked_ids, (
+        f"top_level={len(top_level_ids)}, "
+        f"retweets={sum(x.retweetedTweet is not None for x in tweets)}, "
+        f"retweeted_children={len(retweeted_ids)}, "
+        f"leaked={len(leaked_ids)}"
+    )
+
+
 async def test_cards():
     # Issues:
     # - https://github.com/vladkens/twscrape/issues/72

diff --git a/twscrape/models.py b/twscrape/models.py
@@ -822,9 +822,12 @@ def _parse_items(rep: Response, kind: str, limit: int = -1):
     # check for dict, because Response can be mocked in tests with different type
     res = rep if isinstance(rep, dict) else rep.json()
     obj = to_old_rep(res)
+    retweeted_ids: set[str] = obj.get("retweeted_ids", set())
 
     ids = set()
     for x in obj[key].values():
+        if kind == "tweet" and x.get("id_str") in retweeted_ids:
+            continue
         if limit != -1 and len(ids) >= limit:
             # todo: move somewhere in configuration like force_limit
             # https://github.com/vladkens/twscrape/issues/26#issuecomment-1656875132

diff --git a/twscrape/utils.py b/twscrape/utils.py
@@ -239,7 +239,7 @@ def to_old_obj(obj: dict):
     return _flatten_tweet_v2(obj)
 
 
-def to_old_rep(obj: dict) -> dict[str, dict]:
+def to_old_rep(obj: dict) -> dict[str, Any]:
     tmp = get_typed_object(obj, defaultdict(list))
 
     # "legacy" in x still matches under the new schema: the key is present
@@ -291,7 +291,19 @@ def _to_old_user(obj: dict) -> dict | None:
     trends = list(tmp.get("TimelineTrend", []))
     trends = {x["name"]: x for x in trends}
 
-    return {"tweets": {**tw1, **tw2}, "users": users, "trends": trends}
+    tweets = {**tw1, **tw2}
+    retweeted_ids = {
+        str(retweeted_id)
+        for tweet in tweets.values()
+        for path in (
+            "retweeted_status_id_str",
+            "retweeted_status_result.result.rest_id",
+            "retweeted_status_result.result.tweet.rest_id",
+        )
+        if (retweeted_id := get_or(tweet, path)) is not None
+    }
+
+    return {"tweets": tweets, "retweeted_ids": retweeted_ids, "users": users, "trends": trends}
 
 
 def print_table(rows: list[dict], hr_after=False):