36
loading...
This website collects cookies to deliver better user experience
yugabyte=# \c yugabyte yugabyte
psql (15devel, server 11.2-YB-2.9.1.0-b0)
You are now connected to database "yugabyte" as user "yugabyte".
yugabyte=# create table messages_binary (
"timestamp" text,
"group_id" uuid,
"other_column" int,
primary key("timestamp","group_id")
);
CREATE TABLE
yugabyte=# EXPLAIN SELECT * FROM messages_binary
WHERE group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'
AND timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e';
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
------------------
Seq Scan on messages_binary (cost=0.00..105.00 rows=1000 width=52)
Filter: (("timestamp" > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e'::text) AND (group_id = '983d5259-97ff-49e3-8829-101a
b8dead92'::uuid))
(2 rows)
yugabyte=# create extension pgcrypto;
CREATE EXTENSION
yugabyte=# insert into messages_binary
with groups as (
select gen_random_uuid() group_id from generate_series(1,3)
)
select
to_char(now()+(generate_series(1,3)*interval'1 second')
,'yyyy-mm-ddThh24:mi:ss.000Z-')
||substr(gen_random_uuid()::text,25) "timestamp"
,group_id, 42 as "value"
from groups;
INSERT 0 9
yugabyte=# select * from messages_binary;
timestamp | group_id | other_column
--------------------------------------------+--------------------------------------+--------------
2021-11-07T20:00:23.000Z-c533a5e5623e | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:24.000Z-b879daca6cb7 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:23.000Z-ca98dd4de397 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:22.000Z-c440295c4500 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:24.000Z-631b45e66aba | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:22.000Z-ad01842bb691 | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:24.000Z-90342717a0c8 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:22.000Z-933f552d0159 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:23.000Z-1dcde16fc472 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
(9 rows)
("timestamp","group_id")
. And we can see that the rows I need for group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e
are scattered in this Seq Scan result.yugabyte=# select * from messages_binary
order by "timestamp","group_id";
timestamp | group_id | other_column
--------------------------------------------+--------------------------------------+--------------
2021-11-07T20:00:22.000Z-933f552d0159 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:22.000Z-ad01842bb691 | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:22.000Z-c440295c4500 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:23.000Z-1dcde16fc472 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:23.000Z-c533a5e5623e | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:23.000Z-ca98dd4de397 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:24.000Z-631b45e66aba | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:24.000Z-90342717a0c8 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:24.000Z-b879daca6cb7 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
(9 rows)
WHERE group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e' AND timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e'
predicate. We start on the first row because it verifies timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e'
and from this we have to scan all rows and filter them afterwards. There is no data structure where the interesting rows can be found in a small range that can be read alone. This explains the Seq Scan.yugabyte=# select * from messages_binary
order by "group_id","timestamp";
timestamp | group_id | other_column
--------------------------------------------+--------------------------------------+--------------
2021-11-07T20:00:22.000Z-c440295c4500 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:23.000Z-1dcde16fc472 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:24.000Z-90342717a0c8 | 9c3d61e1-6d3f-4b95-9e08-46f485d10b75 | 42
2021-11-07T20:00:22.000Z-ad01842bb691 | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:23.000Z-c533a5e5623e | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:24.000Z-631b45e66aba | e7e46753-2e99-4ee4-b77f-17136b01790e | 42
2021-11-07T20:00:22.000Z-933f552d0159 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:23.000Z-ca98dd4de397 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
2021-11-07T20:00:24.000Z-b879daca6cb7 | f27ac68f-2a10-46f0-a8fe-77b99c0c5a66 | 42
(9 rows)
group_id='e7e46753-2e99-4ee4-b77f-17136b01790e'
, timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e'
, group_id='e7e46753-2e99-4ee4-b77f-17136b01790e'
. yugabyte=# create index messages_binary_key2
on messages_binary ("group_id","timestamp");
CREATE INDEX
yugabyte=# EXPLAIN SELECT * FROM messages_binary
WHERE group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'
AND timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e';
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
---------------------------
Index Scan using messages_binary_key2 on messages_binary (cost=0.00..5.25 rows=10 width=52)
Index Cond: ((group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'::uuid) AND ("timestamp" > '1970-01-01T00:00:00.000Z-0000-ae26
b84edae7349e'::text))
(2 rows)
yugabyte=# create index messages_binary_key2
on messages_binary ("group_id","timestamp")
include ("other_column");
CREATE INDEX
yugabyte=# EXPLAIN SELECT * FROM messages_binary
WHERE group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'
AND timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e';
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
---------------------------
Index Only Scan using messages_binary_key2 on messages_binary (cost=0.00..5.15 rows=10 width=52)
Index Cond: ((group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'::uuid) AND ("timestamp" > '1970-01-01T00:00:00.000Z-0000-ae26
b84edae7349e'::text))
(2 rows)
timestamp
only without the group_id
. yugabyte=# create table messages_binary (
"timestamp" text,
"group_id" uuid,
"other_column" int,
primary key("group_id","timestamp")
);
CREATE TABLE
yugabyte=# insert into messages_binary
with groups as (
select gen_random_uuid() group_id from generate_series(1,1e3)
)
select
to_char(now()+(generate_series(1,1e4)*interval'1 second')
,'yyyy-mm-ddThh24:mi:ss.000Z-')
||substr(gen_random_uuid()::text,25) "timestamp"
,group_id, 42 as "value"
from groups;
yugabyte=# analyze messages_binary;
ANALYZE
yugabyte=# EXPLAIN (analyze)
SELECT * FROM messages_binary
WHERE group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'
AND timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e';
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
---------------------------
Index Scan using messages_binary_pkey on messages_binary (cost=0.00..1214.95 rows=10530 width=52) (actual time=10.588..100.838
rows=10000 loops=1)
Index Cond: ((group_id = 'e7e46753-2e99-4ee4-b77f-17136b01790e'::uuid) AND ("timestamp" > '1970-01-01T00:00:00.000Z-0000-ae26
b84edae7349e'::text))
Planning Time: 0.067 ms
Execution Time: 101.711 ms
(4 rows)
postgres=# truncate table messages_binary;
TRUNCATE TABLE
postgres=# insert into messages_binary
with groups as (
select gen_random_uuid() group_id from generate_series(1,1e3)
), timestamps as (
select to_char(now()+(generate_series(1,1e4)*interval'1 second')
,'yyyy-mm-ddThh24:mi:ss.000Z-')
||substr(gen_random_uuid()::text,25) "timestamp"
)
select
"timestamp",group_id, 42 as "value"
from timestamps,groups;
INSERT 0 10000000
postgres=# select * from messages_binary limit 1;
timestamp | group_id | other_column
--------------------------------------------+--------------------------------------+--------------
2021-11-08T08:34:14.000Z-6bb27dbe2723 | 91ee7381-eb92-48cd-bb82-9ed939dc3a13 | 42
(1 row)
postgres=# EXPLAIN (analyze,buffers)
SELECT * FROM messages_binary
WHERE group_id = '91ee7381-eb92-48cd-bb82-9ed939dc3a13'
AND timestamp > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on messages_binary (cost=778.61..30792.78 rows=9956 width=58) (actual time=3.021..43.141 rows=10000 loops=1)
Recheck Cond: ((group_id = '91ee7381-eb92-48cd-bb82-9ed939dc3a13'::uuid) AND ("timestamp" > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e'::text))
Heap Blocks: exact=10000
Buffers: shared hit=2006 read=8167 written=2249
-> Bitmap Index Scan on messages_binary_pkey (cost=0.00..776.12 rows=9956 width=0) (actual time=1.897..1.897 rows=10000 loops=1)
Index Cond: ((group_id = '91ee7381-eb92-48cd-bb82-9ed939dc3a13'::uuid) AND ("timestamp" > '1970-01-01T00:00:00.000Z-0000-ae26b84edae7349e'::text))
Buffers: shared hit=11 read=162
Planning Time: 0.086 ms
Execution Time: 43.832 ms
(group_id = '983d5259-97ff-49e3-8829-101ab8dead92'::text)
in the Index Conditions. Storing UUID as TXT is not efficient, I've used the uuid datatype here:yugabyte=# select pg_column_size(
'e7e46753-2e99-4ee4-b77f-17136b01790e'::uuid);
pg_column_size
---------------------
16
(1 row)
yugabyte=# select pg_column_size(
'e7e46753-2e99-4ee4-b77f-17136b01790e'::text);
pg_column_size
---------------------
40
(1 row)