Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp \
int2 int4 int8 float4 float8 money oid \
time timetz date interval \
macaddr inet cidr text varchar char bytea bit varbit \
numeric rum_weight expr array
numeric rum_weight expr array rum_vacuum

TAP_TESTS = 1

Expand Down
154 changes: 154 additions & 0 deletions expected/rum_vacuum.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
-- Test RUM index scan correctness after concurrent VACUUM removes all
-- posting tree entry items.
SET enable_seqscan TO off;
SET enable_indexscan TO off;
SET enable_bitmapscan TO on;
CREATE TABLE test_rum_vacuum (id int, body tsvector);
ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false);
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i;
CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops);
DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999;
-- test normal result
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
-------+--------------------
10000 | 'great':1 'john':2
(1 row)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10000 | 'great':1 'john':2
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
10000 | 'great':1 'john':2
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

VACUUM test_rum_vacuum;
-- this shouldn't cause a core dump
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
-------+--------------------
10000 | 'great':1 'john':2
(1 row)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10000 | 'great':1 'john':2
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
10000 | 'great':1 'john':2
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

-- test that data can still be found after reinsertion
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i;
SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
count
-------
9998
(1 row)

DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999;
VACUUM test_rum_vacuum;
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
-------+--------------------
20000 | 'great':1 'john':2
(1 row)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
20000 | 'great':1 'john':2
(5 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
20000 | 'great':1 'john':2
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

-- test if do while loop works when an entry has no non-empty posting tree pages
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery;
VACUUM test_rum_vacuum;
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
----+------
(0 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(4 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(4 rows)
54 changes: 54 additions & 0 deletions sql/rum_vacuum.sql
Comment thread
buriedpot marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
-- Test RUM index scan correctness after concurrent VACUUM removes all
-- posting tree entry items.

SET enable_seqscan TO off;
SET enable_indexscan TO off;
SET enable_bitmapscan TO on;

CREATE TABLE test_rum_vacuum (id int, body tsvector);
ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false);

INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i;

CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops);

DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999;

-- test normal result
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;

VACUUM test_rum_vacuum;

-- this shouldn't cause a core dump
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;

-- test that data can still be found after reinsertion
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i;
SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999;

VACUUM test_rum_vacuum;

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;

-- test if do while loop works when an entry has no non-empty posting tree pages
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery;
VACUUM test_rum_vacuum;

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
97 changes: 90 additions & 7 deletions src/rumget.c
Original file line number Diff line number Diff line change
Expand Up @@ -684,9 +684,23 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
}

LockBuffer(entry->buffer, RUM_UNLOCK);
entry->isFinished = setListPositionScanEntry(rumstate, entry);
if (!entry->isFinished)
entry->curItem = entry->list[entry->offset];

/*
* If the current page is empty (nlist == 0), we cannot assume the
* scan is complete, as subsequent pages may exist. Therefore, we
* set isFinished = false and leave entry->nlist = 0 and
* entry->offset = 0 to ensure that entryGetItem advances to the
* next page on the next call. Otherwise, initialize curItem to
* the first valid item.
*/
if (entry->nlist == 0)
entry->isFinished = false;
else
{
entry->isFinished = setListPositionScanEntry(rumstate, entry);
if (!entry->isFinished)
entry->curItem = entry->list[entry->offset];
}
}
else if (RumGetNPosting(itup) > 0)
{
Expand All @@ -699,6 +713,16 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
if (!entry->isFinished)
entry->curItem = entry->list[entry->offset];
}
/*
* Else, the posting list for this entry has been entirely vacuumed
* away (nlist == 0 after setListPositionScanEntry). We cannot assume
* the scan is complete, as subsequent pages may exist. Therefore, we
* set isFinished = false and leave entry->nlist = 0 and entry->offset
* = 0 to ensure that entryGetItem advances to the next page on the
* next call.
*/
else
entry->isFinished = false;
Comment thread
buriedpot marked this conversation as resolved.

if (entry->queryCategory == RUM_CAT_EMPTY_QUERY &&
entry->scanWithAddInfo)
Expand Down Expand Up @@ -1011,6 +1035,13 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)

LockBuffer(entry->buffer, RUM_UNLOCK);

/*
* No valid item if VACUUM removed all items concurrently. Go on
* next page.
*/
if (entry->nlist == 0)
break;

if (entry->offset < 0)
{
if (ScanDirectionIsForward(entry->scanDirection) &&
Expand Down Expand Up @@ -1044,6 +1075,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
RumItemSetMin(&entry->curItem);
entry->offset = InvalidOffsetNumber;
entry->list = NULL;
entry->nlist = 0;
if (entry->gdi)
{
freeRumBtreeStack(entry->gdi->stack);
Expand Down Expand Up @@ -1151,6 +1183,18 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)

LockBuffer(entry->buffer, RUM_UNLOCK);
entry->isFinished = false;

/*
* Posting tree's first leaf page is empty due to concurrent VACUUM.
* Advance through empty pages until we find one with items or exhaust
* the tree. entryGetItem does not re-invoke entryGetNextItem after we
* return, so we must do it here to ensure curItem is valid on return.
*/
if (entry->nlist == 0)
{
entryGetNextItem(rumstate, entry, snapshot);
goto entry_done;
}
}
else if (RumGetNPosting(itup) > 0)
{
Expand All @@ -1161,12 +1205,21 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
rumReadTuple(rumstate, entry->attnum, itup, entry->list, true);
entry->isFinished = setListPositionScanEntry(rumstate, entry);
}
/* Posting list has been vacuumed. Go to the next entry. */
else
{
ItemPointerSetInvalid(&entry->curItem.iptr);
entry->isFinished = true;
goto entry_done;
}

Assert(entry->nlist > 0 && entry->list);

entry->curItem = entry->list[entry->offset];
entry->offset += entry->scanDirection;

entry_done:

SCAN_ENTRY_GET_KEY(entry, rumstate, itup);

/*
Expand Down Expand Up @@ -1340,8 +1393,24 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps
}
else if (entry->stack)
{
entry->offset++;
if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList)
/*
* We are responsible for ensuring that we keep advancing through
* ItemLists until we find one that contains at least one valid
* item. This is necessary because concurrent VACUUM may have
* removed all items from a page, leaving an empty ItemList. In
* such cases, we must continue to the next ItemList.
*/
bool success;

Assert(!entry->isFinished);

do
{
entry->isFinished = false;
success = entryGetNextItemList(rumstate, entry, snapshot);
} while (success && entry->nlist == 0);

if (success && nextEntryList)
*nextEntryList = true;
}
else
Expand All @@ -1361,8 +1430,22 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps
dropItem(entry));
if (entry->stack && entry->isFinished)
{
entry->isFinished = false;
if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList)
/*
* We are responsible for ensuring that we keep advancing through
* ItemLists until we find one that contains at least one valid
* item. This is necessary because concurrent VACUUM may have
* removed all items from a page, leaving an empty ItemList. In
* such cases, we must continue to the next ItemList.
*/
bool success;

do
{
entry->isFinished = false;
success = entryGetNextItemList(rumstate, entry, snapshot);
} while (success && entry->nlist == 0);

if (success && nextEntryList)
*nextEntryList = true;
}
}
Expand Down