diff -U3 /tmp/cirrus-ci-build/src/test/regress/expected/join_hash_pglz.out /tmp/cirrus-ci-build/src/test/regress/results/join_hash_pglz.out --- /tmp/cirrus-ci-build/src/test/regress/expected/join_hash_pglz.out 2025-07-07 22:05:14.532847304 +0000 +++ /tmp/cirrus-ci-build/src/test/regress/results/join_hash_pglz.out 2025-07-07 22:09:21.604365209 +0000 @@ -606,561 +606,10 @@ left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; $$); - multibatch ------------- - t -(1 row) - -rollback to settings; --- single-batch with rescan, parallel-oblivious -savepoint settings; -set enable_parallel_hash = off; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '4MB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Hash - -> Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - f -(1 row) - -rollback to settings; --- multi-batch with rescan, parallel-aware -savepoint settings; -set enable_parallel_hash = on; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '64kB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Parallel Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Parallel Hash - -> Parallel Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - t -(1 row) - -rollback to settings; --- single-batch with rescan, parallel-aware -savepoint settings; -set enable_parallel_hash = on; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '4MB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Parallel Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Parallel Hash - -> Parallel Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - f -(1 row) - -rollback to settings; --- A full outer join where every record is matched. --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -explain (costs off) - select count(*) from simple r full outer join simple s using (id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(6 rows) - -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- parallelism not possible with parallel-oblivious full hash join -savepoint settings; -set enable_parallel_hash = off; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s using (id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(6 rows) - -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- parallelism is possible with parallel-aware full hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s using (id); - QUERY PLAN -------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Full Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on simple s -(9 rows) - -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- A full outer join where every record is not matched. --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -explain (costs off) - select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: ((0 - s.id) = r.id) - -> Seq Scan on simple s - -> Hash - -> Seq Scan on simple r -(6 rows) - -select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - count -------- - 40000 -(1 row) - -rollback to settings; --- parallelism not possible with parallel-oblivious full hash join -savepoint settings; -set enable_parallel_hash = off; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: ((0 - s.id) = r.id) - -> Seq Scan on simple s - -> Hash - -> Seq Scan on simple r -(6 rows) - -select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - count -------- - 40000 -(1 row) - -rollback to settings; --- parallelism is possible with parallel-aware full hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - QUERY PLAN -------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Full Join - Hash Cond: ((0 - s.id) = r.id) - -> Parallel Seq Scan on simple s - -> Parallel Hash - -> Parallel Seq Scan on simple r -(9 rows) - -select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - count -------- - 40000 -(1 row) - -rollback to settings; --- exercise special code paths for huge tuples (note use of non-strict --- expression and left join required to get the detoasted tuple into --- the hash table) --- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and --- sts_puttuple oversized tuple cases because it's multi-batch) -savepoint settings; -set max_parallel_workers_per_gather = 2; -set enable_parallel_hash = on; -set work_mem = '128kB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select length(max(s.t)) - from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); - QUERY PLAN ----------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Left Join - Hash Cond: (wide.id = wide_1.id) - -> Parallel Seq Scan on wide - -> Parallel Hash - -> Parallel Seq Scan on wide wide_1 -(9 rows) - -select length(max(s.t)) -from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); - length --------- - 320000 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select length(max(s.t)) - from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); -$$); - multibatch ------------- - t -(1 row) - -rollback to settings; --- Hash join reuses the HOT status bit to indicate match status. This can only --- be guaranteed to produce correct results if all the hash join tuple match --- bits are reset before reuse. This is done upon loading them into the --- hashtable. -SAVEPOINT settings; -SET enable_parallel_hash = on; -SET min_parallel_table_scan_size = 0; -SET parallel_setup_cost = 0; -SET parallel_tuple_cost = 0; -CREATE TABLE hjtest_matchbits_t1(id int); -CREATE TABLE hjtest_matchbits_t2(id int); -INSERT INTO hjtest_matchbits_t1 VALUES (1); -INSERT INTO hjtest_matchbits_t2 VALUES (2); --- Update should create a HOT tuple. If this status bit isn't cleared, we won't --- correctly emit the NULL-extended unmatching tuple in full hash join. -UPDATE hjtest_matchbits_t2 set id = 2; -SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id - ORDER BY t1.id; - id | id -----+---- - 1 | - | 2 -(2 rows) - --- Test serial full hash join. --- Resetting parallel_setup_cost should force a serial plan. --- Just to be safe, however, set enable_parallel_hash to off, as parallel full --- hash joins are only supported with shared hashtables. -RESET parallel_setup_cost; -SET enable_parallel_hash = off; -SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; - id | id -----+---- - 1 | - | 2 -(2 rows) - -ROLLBACK TO settings; -rollback; --- Verify that hash key expressions reference the correct --- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's --- need to reference Hash's outer plan (which is below HashJoin's --- inner plan). It's not trivial to verify that the references are --- correct (we don't display the hashkeys themselves), but if the --- hashkeys contain subplan references, those will be displayed. Force --- subplans to appear just about everywhere. --- --- Bug report: --- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com --- -BEGIN; -SET LOCAL enable_sort = OFF; -- avoid mergejoins -SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order -CREATE TABLE hjtest_1 (a text, b int, id int, c bool); -CREATE TABLE hjtest_2 (a bool, id int, b text, c int); -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; -EXPLAIN (COSTS OFF, VERBOSE) -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_1, hjtest_2 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Hash Join - Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass - Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3))) - Join Filter: (hjtest_1.a <> hjtest_2.b) - -> Seq Scan on public.hjtest_1 - Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b - Filter: ((SubPlan 4) < 50) - SubPlan 4 - -> Result - Output: (hjtest_1.b * 5) - -> Hash - Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b - -> Seq Scan on public.hjtest_2 - Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b - Filter: ((SubPlan 5) < 55) - SubPlan 5 - -> Result - Output: (hjtest_2.c * 5) - SubPlan 1 - -> Result - Output: 1 - One-Time Filter: (hjtest_2.id = 1) - SubPlan 3 - -> Result - Output: (hjtest_2.c * 5) - SubPlan 2 - -> Result - Output: (hjtest_1.b * 5) -(28 rows) - -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_1, hjtest_2 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - a1 | a2 | t1 | t2 -------+----+----------+---------- - text | t | hjtest_1 | hjtest_2 -(1 row) - -EXPLAIN (COSTS OFF, VERBOSE) -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_2, hjtest_1 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Hash Join - Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass - Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2))) - Join Filter: (hjtest_1.a <> hjtest_2.b) - -> Seq Scan on public.hjtest_2 - Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b - Filter: ((SubPlan 5) < 55) - SubPlan 5 - -> Result - Output: (hjtest_2.c * 5) - -> Hash - Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b - -> Seq Scan on public.hjtest_1 - Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b - Filter: ((SubPlan 4) < 50) - SubPlan 4 - -> Result - Output: (hjtest_1.b * 5) - SubPlan 2 - -> Result - Output: (hjtest_1.b * 5) - SubPlan 1 - -> Result - Output: 1 - One-Time Filter: (hjtest_2.id = 1) - SubPlan 3 - -> Result - Output: (hjtest_2.c * 5) -(28 rows) - -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_2, hjtest_1 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - a1 | a2 | t1 | t2 -------+----+----------+---------- - text | t | hjtest_1 | hjtest_2 -(1 row) - -ROLLBACK; --- Verify that we behave sanely when the inner hash keys contain parameters --- (that is, outer or lateral references). This situation has to defeat --- re-use of the inner hash table across rescans. -begin; -set local enable_hashjoin = on; -explain (costs off) -select i8.q2, ss.* from -int8_tbl i8, -lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 - on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; - QUERY PLAN ------------------------------------------------------------ - Nested Loop - -> Seq Scan on int8_tbl i8 - -> Sort - Sort Key: t1.fivethous, i4.f1 - -> Hash Join - Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) - -> Seq Scan on tenk1 t1 - -> Hash - -> Seq Scan on int4_tbl i4 -(9 rows) - -select i8.q2, ss.* from -int8_tbl i8, -lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 - on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; - q2 | fivethous | f1 ------+-----------+---- - 456 | 456 | 0 - 456 | 456 | 0 - 123 | 123 | 0 - 123 | 123 | 0 -(4 rows) - -rollback; +WARNING: terminating connection because of crash of another server process +DETAIL: The postmaster has commanded this server process to roll back the current transaction and exit, because another server process exited abnormally and possibly corrupted shared memory. +HINT: In a moment you should be able to reconnect to the database and repeat your command. +server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +connection to server was lost diff -U3 /tmp/cirrus-ci-build/src/test/regress/expected/join_hash_lz4.out /tmp/cirrus-ci-build/src/test/regress/results/join_hash_lz4.out --- /tmp/cirrus-ci-build/src/test/regress/expected/join_hash_lz4.out 2025-07-07 22:05:14.532847304 +0000 +++ /tmp/cirrus-ci-build/src/test/regress/results/join_hash_lz4.out 2025-07-07 22:09:21.644365173 +0000 @@ -1,1166 +1,2 @@ --- --- exercises for the hash join code --- -begin; -set local min_parallel_table_scan_size = 0; -set local parallel_setup_cost = 0; -set local enable_hashjoin = on; -set local temp_file_compression = 'lz4'; --- Extract bucket and batch counts from an explain analyze plan. In --- general we can't make assertions about how many batches (or --- buckets) will be required because it can vary, but we can in some --- special cases and we can check for growth. -create or replace function find_hash(node json) -returns json language plpgsql -as -$$ -declare - x json; - child json; -begin - if node->>'Node Type' = 'Hash' then - return node; - else - for child in select json_array_elements(node->'Plans') - loop - x := find_hash(child); - if x is not null then - return x; - end if; - end loop; - return null; - end if; -end; -$$; -create or replace function hash_join_batches(query text) -returns table (original int, final int) language plpgsql -as -$$ -declare - whole_plan json; - hash_node json; -begin - for whole_plan in - execute 'explain (analyze, format ''json'') ' || query - loop - hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); - original := hash_node->>'Original Hash Batches'; - final := hash_node->>'Hash Batches'; - return next; - end loop; -end; -$$; --- Make a simple relation with well distributed keys and correctly --- estimated size. -create table simple as - select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; -alter table simple set (parallel_workers = 2); -analyze simple; --- Make a relation whose size we will under-estimate. We want stats --- to say 1000 rows, but actually there are 20,000 rows. -create table bigger_than_it_looks as - select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; -alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); -alter table bigger_than_it_looks set (parallel_workers = 2); -analyze bigger_than_it_looks; -update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; --- Make a relation whose size we underestimate and that also has a --- kind of skew that breaks our batching scheme. We want stats to say --- 2 rows, but actually there are 20,000 rows with the same key. -create table extremely_skewed (id int, t text); -alter table extremely_skewed set (autovacuum_enabled = 'false'); -alter table extremely_skewed set (parallel_workers = 2); -analyze extremely_skewed; -insert into extremely_skewed - select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' - from generate_series(1, 20000); -update pg_class - set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 - where relname = 'extremely_skewed'; --- Make a relation with a couple of enormous tuples. -create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; -alter table wide set (parallel_workers = 2); --- The "optimal" case: the hash table fits in memory; we plan for 1 --- batch, we stick to that number, and peak memory usage stays within --- our work_mem budget --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -set local work_mem = '4MB'; -set local hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from simple r join simple s using (id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(6 rows) - -select count(*) from simple r join simple s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - f | f -(1 row) - -rollback to settings; --- parallel with parallel-oblivious hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '4MB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = off; -explain (costs off) - select count(*) from simple r join simple s using (id); - QUERY PLAN -------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(9 rows) - -select count(*) from simple r join simple s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - f | f -(1 row) - -rollback to settings; --- parallel with parallel-aware hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '4MB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = on; -explain (costs off) - select count(*) from simple r join simple s using (id); - QUERY PLAN -------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on simple s -(9 rows) - -select count(*) from simple r join simple s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - f | f -(1 row) - -rollback to settings; --- The "good" case: batches required, but we plan the right number; we --- plan for some number of batches, and we stick to that number, and --- peak memory usage says within our work_mem budget --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from simple r join simple s using (id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(6 rows) - -select count(*) from simple r join simple s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - t | f -(1 row) - -rollback to settings; --- parallel with parallel-oblivious hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = off; -explain (costs off) - select count(*) from simple r join simple s using (id); - QUERY PLAN -------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(9 rows) - -select count(*) from simple r join simple s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - t | f -(1 row) - -rollback to settings; --- parallel with parallel-aware hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '192kB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = on; -explain (costs off) - select count(*) from simple r join simple s using (id); - QUERY PLAN -------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on simple s -(9 rows) - -select count(*) from simple r join simple s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - t | f -(1 row) - --- parallel full multi-batch hash join -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- The "bad" case: during execution we need to increase number of --- batches; in this case we plan for 1 batch, and increase at least a --- couple of times, and peak memory usage stays within our work_mem --- budget --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); - QUERY PLAN ------------------------------------------------------- - Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on bigger_than_it_looks s -(6 rows) - -select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - f | t -(1 row) - -rollback to settings; --- parallel with parallel-oblivious hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = off; -explain (costs off) - select count(*) from simple r join bigger_than_it_looks s using (id); - QUERY PLAN ------------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Hash - -> Seq Scan on bigger_than_it_looks s -(9 rows) - -select count(*) from simple r join bigger_than_it_looks s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join bigger_than_it_looks s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - f | t -(1 row) - -rollback to settings; --- parallel with parallel-aware hash join -savepoint settings; -set local max_parallel_workers_per_gather = 1; -set local work_mem = '192kB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = on; -explain (costs off) - select count(*) from simple r join bigger_than_it_looks s using (id); - QUERY PLAN ---------------------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 1 - -> Partial Aggregate - -> Parallel Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on bigger_than_it_looks s -(9 rows) - -select count(*) from simple r join bigger_than_it_looks s using (id); - count -------- - 20000 -(1 row) - -select original > 1 as initially_multibatch, final > original as increased_batches - from hash_join_batches( -$$ - select count(*) from simple r join bigger_than_it_looks s using (id); -$$); - initially_multibatch | increased_batches -----------------------+------------------- - f | t -(1 row) - -rollback to settings; --- The "ugly" case: increasing the number of batches during execution --- doesn't help, so stop trying to fit in work_mem and hope for the --- best; in this case we plan for 1 batch, increases just once and --- then stop increasing because that didn't help at all, so we blow --- right through the work_mem budget and hope for the best... --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from simple r join extremely_skewed s using (id); - QUERY PLAN --------------------------------------------------- - Aggregate - -> Hash Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on extremely_skewed s -(6 rows) - -select count(*) from simple r join extremely_skewed s using (id); - count -------- - 20000 -(1 row) - -select * from hash_join_batches( -$$ - select count(*) from simple r join extremely_skewed s using (id); -$$); - original | final -----------+------- - 1 | 2 -(1 row) - -rollback to settings; --- parallel with parallel-oblivious hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = off; -explain (costs off) - select count(*) from simple r join extremely_skewed s using (id); - QUERY PLAN --------------------------------------------------------- - Aggregate - -> Gather - Workers Planned: 2 - -> Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Hash - -> Seq Scan on extremely_skewed s -(8 rows) - -select count(*) from simple r join extremely_skewed s using (id); - count -------- - 20000 -(1 row) - -select * from hash_join_batches( -$$ - select count(*) from simple r join extremely_skewed s using (id); -$$); - original | final -----------+------- - 1 | 2 -(1 row) - -rollback to settings; --- parallel with parallel-aware hash join -savepoint settings; -set local max_parallel_workers_per_gather = 1; -set local work_mem = '128kB'; -set local hash_mem_multiplier = 1.0; -set local enable_parallel_hash = on; -explain (costs off) - select count(*) from simple r join extremely_skewed s using (id); - QUERY PLAN ------------------------------------------------------------------ - Aggregate - -> Gather - Workers Planned: 1 - -> Parallel Hash Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on extremely_skewed s -(8 rows) - -select count(*) from simple r join extremely_skewed s using (id); - count -------- - 20000 -(1 row) - -select * from hash_join_batches( -$$ - select count(*) from simple r join extremely_skewed s using (id); -$$); - original | final -----------+------- - 1 | 4 -(1 row) - -rollback to settings; --- A couple of other hash join tests unrelated to work_mem management. --- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate -savepoint settings; -set local max_parallel_workers_per_gather = 2; -set local work_mem = '4MB'; -set local hash_mem_multiplier = 1.0; -set local parallel_leader_participation = off; -select * from hash_join_batches( -$$ - select count(*) from simple r join simple s using (id); -$$); - original | final -----------+------- - 1 | 1 -(1 row) - -rollback to settings; --- Exercise rescans. We'll turn off parallel_leader_participation so --- that we can check that instrumentation comes back correctly. -create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; -alter table join_foo set (parallel_workers = 0); -create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; -alter table join_bar set (parallel_workers = 2); --- multi-batch with rescan, parallel-oblivious -savepoint settings; -set enable_parallel_hash = off; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '64kB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Hash - -> Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - t -(1 row) - -rollback to settings; --- single-batch with rescan, parallel-oblivious -savepoint settings; -set enable_parallel_hash = off; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '4MB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Hash - -> Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - f -(1 row) - -rollback to settings; --- multi-batch with rescan, parallel-aware -savepoint settings; -set enable_parallel_hash = on; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '64kB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Parallel Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Parallel Hash - -> Parallel Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - t -(1 row) - -rollback to settings; --- single-batch with rescan, parallel-aware -savepoint settings; -set enable_parallel_hash = on; -set parallel_leader_participation = off; -set min_parallel_table_scan_size = 0; -set parallel_setup_cost = 0; -set parallel_tuple_cost = 0; -set max_parallel_workers_per_gather = 2; -set enable_material = off; -set enable_mergejoin = off; -set work_mem = '4MB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - QUERY PLAN ------------------------------------------------------------------------------------- - Aggregate - -> Nested Loop Left Join - Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) - -> Seq Scan on join_foo - -> Gather - Workers Planned: 2 - -> Parallel Hash Join - Hash Cond: (b1.id = b2.id) - -> Parallel Seq Scan on join_bar b1 - -> Parallel Hash - -> Parallel Seq Scan on join_bar b2 -(11 rows) - -select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; - count -------- - 3 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select count(*) from join_foo - left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss - on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; -$$); - multibatch ------------- - f -(1 row) - -rollback to settings; --- A full outer join where every record is matched. --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -explain (costs off) - select count(*) from simple r full outer join simple s using (id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(6 rows) - -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- parallelism not possible with parallel-oblivious full hash join -savepoint settings; -set enable_parallel_hash = off; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s using (id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: (r.id = s.id) - -> Seq Scan on simple r - -> Hash - -> Seq Scan on simple s -(6 rows) - -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- parallelism is possible with parallel-aware full hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s using (id); - QUERY PLAN -------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Full Join - Hash Cond: (r.id = s.id) - -> Parallel Seq Scan on simple r - -> Parallel Hash - -> Parallel Seq Scan on simple s -(9 rows) - -select count(*) from simple r full outer join simple s using (id); - count -------- - 20000 -(1 row) - -rollback to settings; --- A full outer join where every record is not matched. --- non-parallel -savepoint settings; -set local max_parallel_workers_per_gather = 0; -explain (costs off) - select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: ((0 - s.id) = r.id) - -> Seq Scan on simple s - -> Hash - -> Seq Scan on simple r -(6 rows) - -select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - count -------- - 40000 -(1 row) - -rollback to settings; --- parallelism not possible with parallel-oblivious full hash join -savepoint settings; -set enable_parallel_hash = off; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - QUERY PLAN ----------------------------------------- - Aggregate - -> Hash Full Join - Hash Cond: ((0 - s.id) = r.id) - -> Seq Scan on simple s - -> Hash - -> Seq Scan on simple r -(6 rows) - -select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - count -------- - 40000 -(1 row) - -rollback to settings; --- parallelism is possible with parallel-aware full hash join -savepoint settings; -set local max_parallel_workers_per_gather = 2; -explain (costs off) - select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - QUERY PLAN -------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Full Join - Hash Cond: ((0 - s.id) = r.id) - -> Parallel Seq Scan on simple s - -> Parallel Hash - -> Parallel Seq Scan on simple r -(9 rows) - -select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); - count -------- - 40000 -(1 row) - -rollback to settings; --- exercise special code paths for huge tuples (note use of non-strict --- expression and left join required to get the detoasted tuple into --- the hash table) --- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and --- sts_puttuple oversized tuple cases because it's multi-batch) -savepoint settings; -set max_parallel_workers_per_gather = 2; -set enable_parallel_hash = on; -set work_mem = '128kB'; -set hash_mem_multiplier = 1.0; -explain (costs off) - select length(max(s.t)) - from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); - QUERY PLAN ----------------------------------------------------------------- - Finalize Aggregate - -> Gather - Workers Planned: 2 - -> Partial Aggregate - -> Parallel Hash Left Join - Hash Cond: (wide.id = wide_1.id) - -> Parallel Seq Scan on wide - -> Parallel Hash - -> Parallel Seq Scan on wide wide_1 -(9 rows) - -select length(max(s.t)) -from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); - length --------- - 320000 -(1 row) - -select final > 1 as multibatch - from hash_join_batches( -$$ - select length(max(s.t)) - from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); -$$); - multibatch ------------- - t -(1 row) - -rollback to settings; --- Hash join reuses the HOT status bit to indicate match status. This can only --- be guaranteed to produce correct results if all the hash join tuple match --- bits are reset before reuse. This is done upon loading them into the --- hashtable. -SAVEPOINT settings; -SET enable_parallel_hash = on; -SET min_parallel_table_scan_size = 0; -SET parallel_setup_cost = 0; -SET parallel_tuple_cost = 0; -CREATE TABLE hjtest_matchbits_t1(id int); -CREATE TABLE hjtest_matchbits_t2(id int); -INSERT INTO hjtest_matchbits_t1 VALUES (1); -INSERT INTO hjtest_matchbits_t2 VALUES (2); --- Update should create a HOT tuple. If this status bit isn't cleared, we won't --- correctly emit the NULL-extended unmatching tuple in full hash join. -UPDATE hjtest_matchbits_t2 set id = 2; -SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id - ORDER BY t1.id; - id | id -----+---- - 1 | - | 2 -(2 rows) - --- Test serial full hash join. --- Resetting parallel_setup_cost should force a serial plan. --- Just to be safe, however, set enable_parallel_hash to off, as parallel full --- hash joins are only supported with shared hashtables. -RESET parallel_setup_cost; -SET enable_parallel_hash = off; -SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; - id | id -----+---- - 1 | - | 2 -(2 rows) - -ROLLBACK TO settings; -rollback; --- Verify that hash key expressions reference the correct --- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's --- need to reference Hash's outer plan (which is below HashJoin's --- inner plan). It's not trivial to verify that the references are --- correct (we don't display the hashkeys themselves), but if the --- hashkeys contain subplan references, those will be displayed. Force --- subplans to appear just about everywhere. --- --- Bug report: --- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com --- -BEGIN; -SET LOCAL enable_sort = OFF; -- avoid mergejoins -SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order -CREATE TABLE hjtest_1 (a text, b int, id int, c bool); -CREATE TABLE hjtest_2 (a bool, id int, b text, c int); -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 -INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) -INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; -EXPLAIN (COSTS OFF, VERBOSE) -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_1, hjtest_2 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Hash Join - Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass - Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3))) - Join Filter: (hjtest_1.a <> hjtest_2.b) - -> Seq Scan on public.hjtest_1 - Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b - Filter: ((SubPlan 4) < 50) - SubPlan 4 - -> Result - Output: (hjtest_1.b * 5) - -> Hash - Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b - -> Seq Scan on public.hjtest_2 - Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b - Filter: ((SubPlan 5) < 55) - SubPlan 5 - -> Result - Output: (hjtest_2.c * 5) - SubPlan 1 - -> Result - Output: 1 - One-Time Filter: (hjtest_2.id = 1) - SubPlan 3 - -> Result - Output: (hjtest_2.c * 5) - SubPlan 2 - -> Result - Output: (hjtest_1.b * 5) -(28 rows) - -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_1, hjtest_2 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - a1 | a2 | t1 | t2 -------+----+----------+---------- - text | t | hjtest_1 | hjtest_2 -(1 row) - -EXPLAIN (COSTS OFF, VERBOSE) -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_2, hjtest_1 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Hash Join - Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass - Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2))) - Join Filter: (hjtest_1.a <> hjtest_2.b) - -> Seq Scan on public.hjtest_2 - Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b - Filter: ((SubPlan 5) < 55) - SubPlan 5 - -> Result - Output: (hjtest_2.c * 5) - -> Hash - Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b - -> Seq Scan on public.hjtest_1 - Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b - Filter: ((SubPlan 4) < 50) - SubPlan 4 - -> Result - Output: (hjtest_1.b * 5) - SubPlan 2 - -> Result - Output: (hjtest_1.b * 5) - SubPlan 1 - -> Result - Output: 1 - One-Time Filter: (hjtest_2.id = 1) - SubPlan 3 - -> Result - Output: (hjtest_2.c * 5) -(28 rows) - -SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 -FROM hjtest_2, hjtest_1 -WHERE - hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) - AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) - AND (SELECT hjtest_1.b * 5) < 50 - AND (SELECT hjtest_2.c * 5) < 55 - AND hjtest_1.a <> hjtest_2.b; - a1 | a2 | t1 | t2 -------+----+----------+---------- - text | t | hjtest_1 | hjtest_2 -(1 row) - -ROLLBACK; --- Verify that we behave sanely when the inner hash keys contain parameters --- (that is, outer or lateral references). This situation has to defeat --- re-use of the inner hash table across rescans. -begin; -set local enable_hashjoin = on; -explain (costs off) -select i8.q2, ss.* from -int8_tbl i8, -lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 - on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; - QUERY PLAN ------------------------------------------------------------ - Nested Loop - -> Seq Scan on int8_tbl i8 - -> Sort - Sort Key: t1.fivethous, i4.f1 - -> Hash Join - Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) - -> Seq Scan on tenk1 t1 - -> Hash - -> Seq Scan on int4_tbl i4 -(9 rows) - -select i8.q2, ss.* from -int8_tbl i8, -lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 - on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; - q2 | fivethous | f1 ------+-----------+---- - 456 | 456 | 0 - 456 | 456 | 0 - 123 | 123 | 0 - 123 | 123 | 0 -(4 rows) - -rollback; +psql: error: connection to server on socket "/tmp/pg_regress-KayiD1/.s.PGSQL.58928" failed: No such file or directory + Is the server running locally and accepting connections on that socket?