October 22, 2024
Chicago 12, Melborne City, USA
SQL

Postgres is using wrong index


  1. Create 2 identical tables:
CREATE TABLE logs1 (
    user_id int4 NOT NULL,
    create_time timestamptz NOT NULL DEFAULT now()
);

CREATE TABLE logs2 (
    user_id int4 NOT NULL,
    create_time timestamptz NOT NULL DEFAULT now()
);
  1. I insert 200,000,000 entries into the first table, 2,000,000 for each of the 100 users:
INSERT INTO public.logs1 (user_id) (select 1 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 2 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 3 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 4 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 5 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 6 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 7 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 8 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 9 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 10 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 11 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 12 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 13 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 14 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 15 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 16 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 17 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 18 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 19 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 20 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 21 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 22 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 23 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 24 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 25 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 26 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 27 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 28 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 29 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 30 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 31 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 32 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 33 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 34 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 35 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 36 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 37 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 38 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 39 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 40 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 41 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 42 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 43 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 44 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 45 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 46 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 47 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 48 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 49 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 50 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 51 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 52 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 53 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 54 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 55 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 56 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 57 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 58 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 59 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 60 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 61 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 62 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 63 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 64 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 65 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 66 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 67 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 68 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 69 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 70 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 71 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 72 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 73 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 74 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 75 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 76 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 77 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 78 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 79 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 80 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 81 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 82 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 83 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 84 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 85 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 86 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 87 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 88 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 89 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 90 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 91 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 92 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 93 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 94 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 95 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 96 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 97 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 98 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 99 from generate_series(1,2000000));
INSERT INTO public.logs1 (user_id) (select 100 from generate_series(1,2000000));
  1. Insert 200,000,000 entries into the second table, but in such a way that the data of several users takes up most of the total data:
INSERT INTO public.logs2 (user_id) (select 1 from generate_series(1,95802970));
INSERT INTO public.logs2 (user_id) (select 2 from generate_series(1,10561980));
INSERT INTO public.logs2 (user_id) (select 3 from generate_series(1,17014900));
INSERT INTO public.logs2 (user_id) (select 4 from generate_series(1,14987000));
INSERT INTO public.logs2 (user_id) (select 5 from generate_series(1,14701010));
INSERT INTO public.logs2 (user_id) (select 6 from generate_series(1,13137950));
INSERT INTO public.logs2 (user_id) (select 7 from generate_series(1,7180130));
INSERT INTO public.logs2 (user_id) (select 8 from generate_series(1,6930550));
INSERT INTO public.logs2 (user_id) (select 9 from generate_series(1,4260890));
INSERT INTO public.logs2 (user_id) (select 10 from generate_series(1,3702960));
INSERT INTO public.logs2 (user_id) (select 11 from generate_series(1,3215300));
INSERT INTO public.logs2 (user_id) (select 12 from generate_series(1,2648260));
INSERT INTO public.logs2 (user_id) (select 13 from generate_series(1,1197320));
INSERT INTO public.logs2 (user_id) (select 14 from generate_series(1,802200));
INSERT INTO public.logs2 (user_id) (select 15 from generate_series(1,709840));
INSERT INTO public.logs2 (user_id) (select 16 from generate_series(1,661650));
INSERT INTO public.logs2 (user_id) (select 17 from generate_series(1,628120));
INSERT INTO public.logs2 (user_id) (select 18 from generate_series(1,590780));
INSERT INTO public.logs2 (user_id) (select 19 from generate_series(1,433570));
INSERT INTO public.logs2 (user_id) (select 20 from generate_series(1,291460));
INSERT INTO public.logs2 (user_id) (select 21 from generate_series(1,195000));
INSERT INTO public.logs2 (user_id) (select 22 from generate_series(1,158450));
INSERT INTO public.logs2 (user_id) (select 23 from generate_series(1,55980));
INSERT INTO public.logs2 (user_id) (select 24 from generate_series(1,34210));
INSERT INTO public.logs2 (user_id) (select 25 from generate_series(1,21660));
INSERT INTO public.logs2 (user_id) (select 26 from generate_series(1,17400));
INSERT INTO public.logs2 (user_id) (select 27 from generate_series(1,9500));
INSERT INTO public.logs2 (user_id) (select 28 from generate_series(1,7850));
INSERT INTO public.logs2 (user_id) (select 29 from generate_series(1,7450));
INSERT INTO public.logs2 (user_id) (select 30 from generate_series(1,4910));
INSERT INTO public.logs2 (user_id) (select 31 from generate_series(1,4860));
INSERT INTO public.logs2 (user_id) (select 32 from generate_series(1,3150));
INSERT INTO public.logs2 (user_id) (select 33 from generate_series(1,2760));
INSERT INTO public.logs2 (user_id) (select 34 from generate_series(1,2620));
INSERT INTO public.logs2 (user_id) (select 35 from generate_series(1,2160));
INSERT INTO public.logs2 (user_id) (select 36 from generate_series(1,1810));
INSERT INTO public.logs2 (user_id) (select 37 from generate_series(1,1630));
INSERT INTO public.logs2 (user_id) (select 38 from generate_series(1,1410));
INSERT INTO public.logs2 (user_id) (select 39 from generate_series(1,1090));
INSERT INTO public.logs2 (user_id) (select 40 from generate_series(1,1050));
INSERT INTO public.logs2 (user_id) (select 41 from generate_series(1,830));
INSERT INTO public.logs2 (user_id) (select 42 from generate_series(1,610));
INSERT INTO public.logs2 (user_id) (select 43 from generate_series(1,560));
INSERT INTO public.logs2 (user_id) (select 44 from generate_series(1,540));
INSERT INTO public.logs2 (user_id) (select 45 from generate_series(1,500));
INSERT INTO public.logs2 (user_id) (select 46 from generate_series(1,490));
INSERT INTO public.logs2 (user_id) (select 47 from generate_series(1,330));
INSERT INTO public.logs2 (user_id) (select 48 from generate_series(1,240));
INSERT INTO public.logs2 (user_id) (select 49 from generate_series(1,210));
INSERT INTO public.logs2 (user_id) (select 50 from generate_series(1,160));
INSERT INTO public.logs2 (user_id) (select 51 from generate_series(1,130));
INSERT INTO public.logs2 (user_id) (select 52 from generate_series(1,110));
INSERT INTO public.logs2 (user_id) (select 53 from generate_series(1,100));
INSERT INTO public.logs2 (user_id) (select 54 from generate_series(1,100));
INSERT INTO public.logs2 (user_id) (select 55 from generate_series(1,90));
INSERT INTO public.logs2 (user_id) (select 56 from generate_series(1,90));
INSERT INTO public.logs2 (user_id) (select 57 from generate_series(1,90));
INSERT INTO public.logs2 (user_id) (select 58 from generate_series(1,80));
INSERT INTO public.logs2 (user_id) (select 59 from generate_series(1,80));
INSERT INTO public.logs2 (user_id) (select 60 from generate_series(1,70));
INSERT INTO public.logs2 (user_id) (select 61 from generate_series(1,70));
INSERT INTO public.logs2 (user_id) (select 62 from generate_series(1,70));
INSERT INTO public.logs2 (user_id) (select 63 from generate_series(1,60));
INSERT INTO public.logs2 (user_id) (select 64 from generate_series(1,40));
INSERT INTO public.logs2 (user_id) (select 65 from generate_series(1,40));
INSERT INTO public.logs2 (user_id) (select 66 from generate_series(1,40));
INSERT INTO public.logs2 (user_id) (select 67 from generate_series(1,30));
INSERT INTO public.logs2 (user_id) (select 68 from generate_series(1,30));
INSERT INTO public.logs2 (user_id) (select 69 from generate_series(1,30));
INSERT INTO public.logs2 (user_id) (select 70 from generate_series(1,30));
INSERT INTO public.logs2 (user_id) (select 71 from generate_series(1,30));
INSERT INTO public.logs2 (user_id) (select 72 from generate_series(1,20));
INSERT INTO public.logs2 (user_id) (select 73 from generate_series(1,20));
INSERT INTO public.logs2 (user_id) (select 74 from generate_series(1,20));
INSERT INTO public.logs2 (user_id) (select 75 from generate_series(1,20));
INSERT INTO public.logs2 (user_id) (select 76 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 77 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 78 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 79 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 80 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 81 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 82 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 83 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 84 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 85 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 86 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 87 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 88 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 89 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 90 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 91 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 92 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 93 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 94 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 95 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 96 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 97 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 98 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 99 from generate_series(1,10));
INSERT INTO public.logs2 (user_id) (select 100 from generate_series(1,10));
  1. Create identical indexes:
CREATE INDEX logs1_user_id_idx ON ONLY logs1 USING btree (user_id);
CREATE INDEX logs1_create_time_idx ON ONLY logs1 USING btree (create_time);
CREATE INDEX logs1_user_id_create_time_idx ON ONLY logs1 USING btree (user_id, create_time);

CREATE INDEX logs2_user_id_idx ON ONLY logs2 USING btree (user_id);
CREATE INDEX logs2_create_time_idx ON ONLY logs2 USING btree (create_time);
CREATE INDEX logs2_user_id_create_time_idx ON ONLY logs2 USING btree (user_id, create_time);
  1. Run analyze:
analyze logs1;
analyze logs2;

As a result, I have 2 tables with the same schemas, indexes, the same amount of data, but with a different distribution of this data.

I ran this query on both tables:

EXPLAIN ANALYZE
SELECT *
FROM logs1 -- logs2
WHERE user_id = 1001 
ORDER BY create_time  DESC
OFFSET 0 ROWS 
FETCH FIRST 10 ROWS only

And here is the result:

--------- logs1 ---------
Limit  (cost=0.57..8.59 rows=1 width=12) (actual time=0.026..0.028 rows=0 loops=1)
  ->  Index Only Scan Backward using logs1_user_id_create_time_idx on logs1  (cost=0.57..8.59 rows=1 width=12) (actual time=0.024..0.024 rows=0 loops=1)
        Index Cond: (user_id = 1001)
        Heap Fetches: 0
Planning Time: 0.447 ms
Execution Time: 0.062 ms

--------- logs2 ---------
Limit  (cost=0.57..0.78 rows=10 width=12) (actual time=0.042..0.044 rows=0 loops=1)
  ->  Index Only Scan Backward using logs2_user_id_create_time_idx on logs2  (cost=0.57..414.57 rows=20000 width=12) (actual time=0.039..0.040 rows=0 loops=1)
        Index Cond: (user_id = 1001)
        Heap Fetches: 0
Planning Time: 0.220 ms
Execution Time: 0.080 ms

In both cases the correct index was used – logs1_user_id_create_time_idx, which caused the query to execute quickly. Well, now I run this query:

EXPLAIN ANALYZE
SELECT *
FROM logs1 -- logs2
WHERE user_id IN (1001, 1002)
ORDER BY create_time  DESC
OFFSET 0 ROWS 
FETCH FIRST 10 ROWS only

And here is the result:

--------- logs1 ---------
Limit  (cost=13.18..13.18 rows=1 width=12) (actual time=0.086..0.087 rows=0 loops=1)
  ->  Sort  (cost=13.18..13.18 rows=1 width=12) (actual time=0.083..0.084 rows=0 loops=1)
        Sort Key: create_time DESC
        Sort Method: quicksort  Memory: 25kB
        ->  Index Only Scan using logs1_user_id_create_time_idx on logs1  (cost=0.57..13.17 rows=1 width=12) (actual time=0.074..0.074 rows=0 loops=1)
              Index Cond: (user_id = ANY ('{1001,1002}'::integer[]))
              Heap Fetches: 0
Planning Time: 0.223 ms
Execution Time: 0.124 ms

--------- logs2 ---------
Limit  (cost=0.57..1303.11 rows=10 width=12) (actual time=254657.997..254658.003 rows=0 loops=1)
  ->  Index Scan Backward using logs2_create_time_idx on logs2  (cost=0.57..5210160.65 rows=40000 width=12) (actual time=254657.992..254657.993 rows=0 loops=1)
        Filter: (user_id = ANY ('{1001,1002}'::integer[]))
        Rows Removed by Filter: 200000000
Planning Time: 0.247 ms
Execution Time: 254658.069 ms

In case of logs1, the query executed quickly because the same index was used as in the previous query.

In logs2, the query executed 2 million times longer (over 4 minutes) because a different index was used – logs2_create_time_idx. If I remove this index: logs2_create_time_idx, then postgres will come to its senses and use logs2_user_id_create_time_idx, causing the query to execute quickly:

Limit  (cost=9.18..9.18 rows=1 width=12) (actual time=0.143..0.146 rows=0 loops=1)
  ->  Sort  (cost=9.18..9.18 rows=1 width=12) (actual time=0.141..0.143 rows=0 loops=1)
        Sort Key: create_time DESC
        Sort Method: quicksort  Memory: 25kB
        ->  Index Only Scan using logs1_user_id_create_time_idx on logs1  (cost=0.57..9.17 rows=1 width=12) (actual time=0.122..0.123 rows=0 loops=1)
              Index Cond: (user_id = ANY ('{1001,1002}'::integer[]))
              Heap Fetches: 0
Planning Time: 0.245 ms
Execution Time: 0.188 ms

I am in a situation where postgres selects the wrong index for me, which is why the query takes incomparably longer to execute than if the correct index had been selected. What can I do about it? In sql server I could force the engine to use a specific index, but in pogres, as far as I know, this cannot be done.

I will also add that postgres uses the correct index when I operate on a smaller amount of data, e.g. not 200,000,000, but 2,000,000.



You need to sign in to view this answers

Leave feedback about this

  • Quality
  • Price
  • Service

PROS

+
Add Field

CONS

+
Add Field
Choose Image
Choose Video