如何解释这个执行计划(IN vs EXISTS)?
从执行计划来看,IN 版本比 EXISTS 版本更快
我认为 EXISTS 查询更快,因为它急切地检查条件。 IN查询虽然看起来很直观,但我觉得它似乎很晚才解决最终条件的结果;也就是说,从内到外,我认为 IN 较慢,因为下一个条件需要等待其内部条件的结果。
虽然从下面的执行计划来看,IN 更快;它的成本较低。
我的一部分想法是想使用 IN 版本,对于下一个维护者来说它看起来简单直观,下面的执行计划似乎表明它比 EXISTS 更快。但我的另一部分是想使用 EXISTS,因为我只是觉得它更快,但下面的执行计划似乎与这种看法相矛盾。
下面的两个查询哪一个更快,是 IN 版本还是 EXISTS 版本?
IN 版本:
explain analyze
select ceil(
( count(distinct company_rec_id)::numeric + 1 )
/ ((1)) )::int
from
parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
(((E'')) <> '' and to_tsvector(c.company) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((0)) <> 0 and ord.parcel_number = ((0)))
or ((0)) = 0
)
and parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from parcel_application_shoe
where
(
(((E'')) <> '' and to_tsvector(extract_words(shoe_description)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(order_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(style_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
)
)
"Aggregate (cost=1060.73..1060.75 rows=1 width=37) (actual time=29.028..29.028 rows=1 loops=1)"
" -> Hash Join (cost=880.43..1053.04 rows=3074 width=37) (actual time=13.261..16.365 rows=3074 loops=1)"
" Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
" -> Hash Join (cost=864.79..995.14 rows=3074 width=37) (actual time=13.115..15.027 rows=3074 loops=1)"
" Hash Cond: ((ord.parcel_rec_id)::text = (parcel_application.parcel_rec_id)::text)"
" -> Seq Scan on parcel ord (cost=0.00..78.87 rows=3087 width=74) (actual time=0.005..0.373 rows=3087 loops=1)"
" -> Hash (cost=826.37..826.37 rows=3074 width=37) (actual time=13.102..13.102 rows=3074 loops=1)"
" -> HashAggregate (cost=795.63..826.37 rows=3074 width=37) (actual time=11.835..12.281 rows=3074 loops=1)"
" -> Hash Join (cost=541.34..787.59 rows=3218 width=37) (actual time=7.076..10.286 rows=3218 loops=1)"
" Hash Cond: ((parcel_application.parcel_application_rec_id)::text = (parcel_application_shoe.parcel_application_rec_id)::text)"
" -> Seq Scan on parcel_application (cost=0.00..122.18 rows=3218 width=74) (actual time=0.004..0.710 rows=3218 loops=1)"
" -> Hash (cost=501.12..501.12 rows=3218 width=37) (actual time=7.061..7.061 rows=3218 loops=1)"
" -> HashAggregate (cost=468.94..501.12 rows=3218 width=37) (actual time=5.721..6.220 rows=3218 loops=1)"
" -> Seq Scan on parcel_application_shoe (cost=0.00..442.95 rows=10395 width=37) (actual time=0.004..2.318 rows=10395 loops=1)"
" -> Hash (cost=11.95..11.95 rows=295 width=37) (actual time=0.136..0.136 rows=295 loops=1)"
" -> Seq Scan on company c (cost=0.00..11.95 rows=295 width=37) (actual time=0.013..0.068 rows=295 loops=1)"
"Total runtime: 29.122 ms"
EXISTS 版本:
explain analyze
select ceil(
( count(distinct company_rec_id)::numeric + 1 )
/ ((1)) )::int from
parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
(((E'')) <> '' and to_tsvector(c.company) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((0)) <> 0 and ord.parcel_number = ((0)))
or ((0)) = 0
)
and exists
(
select * from parcel_application pa
where pa.parcel_rec_id = ord.parcel_rec_id
and
exists
(
select * from parcel_application_shoe ord_item
where
ord_item.parcel_application_rec_id = pa.parcel_application_rec_id
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.shoe_description)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.order_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.style_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
)
)
"Aggregate (cost=17773.13..17773.16 rows=1 width=37) (actual time=34.519..34.520 rows=1 loops=1)"
" -> Hash Join (cost=17636.59..17769.11 rows=1609 width=37) (actual time=17.174..20.426 rows=3074 loops=1)"
" Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
" -> Hash Join (cost=17620.95..17731.35 rows=1609 width=37) (actual time=16.882..18.862 rows=3074 loops=1)"
" Hash Cond: ((ord.parcel_rec_id)::text = (pa.parcel_rec_id)::text)"
" -> Seq Scan on parcel ord (cost=0.00..78.87 rows=3087 width=74) (actual time=0.006..0.409 rows=3087 loops=1)"
" -> Hash (cost=17601.74..17601.74 rows=1537 width=37) (actual time=16.858..16.858 rows=3074 loops=1)"
" -> HashAggregate (cost=17586.37..17601.74 rows=1537 width=37) (actual time=15.015..15.535 rows=3074 loops=1)"
" -> Seq Scan on parcel_application pa (cost=0.00..17582.35 rows=1609 width=37) (actual time=10.040..12.440 rows=3218 loops=1)"
" Filter: (alternatives: SubPlan 1 or hashed SubPlan 2)"
" SubPlan 1"
" -> Index Scan using fki_parcel_application_shoe__parcel_application on parcel_application_shoe ord_item (cost=0.00..16.28 rows=3 width=0) (never executed)"
" Index Cond: ((parcel_application_rec_id)::text = ($0)::text)"
" SubPlan 2"
" -> Seq Scan on parcel_application_shoe ord_item (cost=0.00..442.95 rows=10395 width=37) (actual time=0.005..4.482 rows=10395 loops=1)"
" -> Hash (cost=11.95..11.95 rows=295 width=37) (actual time=0.284..0.284 rows=295 loops=1)"
" -> Seq Scan on company c (cost=0.00..11.95 rows=295 width=37) (actual time=0.010..0.155 rows=295 loops=1)"
"Total runtime: 34.645 ms"
这是我最终使用的最终查询,IN 版本。我只能感知到之间的边际性能差异 IN 和 EXISTS 版本。我更多地考虑下一个维护者的可读性,因此我使用了 IN
select ord.parcel_number, ord.received_date, c.company
from parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
:_company = ''
or
to_tsvector(c.company) @@ plainto_tsquery(extract_words(:_company))
)
and
(
:_fttsc_num = 0
or ord.parcel_number = :_fttsc_num
)
and
(
(:_item = '' and :_order_num = '' and :_style_num = '')
or
ord.parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from parcel_application_shoe
where
(
:_item = ''
or to_tsvector(extract_words(shoe_description)) @@ plainto_tsquery(extract_words(:_item))
)
and
(
:_order_num = ''
or to_tsvector(extract_words(order_number)) @@ plainto_tsquery(extract_words(:_order_num))
)
and
(
:_style_num = ''
or to_tsvector(extract_words(style_number)) @@ plainto_tsquery(extract_words(:_style_num))
)
) -- parcel_application_rec_id IN
) -- parcel_rec_id IN
)
and
-- material filter...
(
(:_material = '')
or
-- implied material <> ''
parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from mlist
where mlist_rec_id in
(
select mlist_rec_id
from mlist_detail
join material m using(material_rec_id)
where to_tsvector(extract_words(m.material)) @@ plainto_tsquery(extract_words(:_material))
)
)
)
)
-- ...material filter
and
-- parameter filter...
(
(:_parameter = '')
or
-- implied parameter <> ''
parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from mlist
where mlist_rec_id in
(
select mlist_rec_id
from mlist_detail
where mlist_detail_rec_id in
(
select mlist_detail_rec_id
from mlist_detail_parameter
join parameter p using(parameter_rec_id)
where to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words(:_parameter))
)
)
)
)
)
-- ...parameter filter
order by ord.received_date
It looks like from the execution plan the IN version is faster than EXISTS version
I'm thinking that EXISTS query is faster, for it eagerly checks the conditions. The IN query though it looks intuitive, I feel that it seems it resolves the final conditions' result very late; that is, from inside out, I perceived that IN are slower because the next condition need to wait for the result of its inner conditions.
Though from the look of the execution plans below, the IN is faster; it has lower cost.
Part of me is wanting to use the IN version, it looks easy and intuitive for the next maintainer, and the execution plan below seems to indicate that it is faster than EXISTS. But another part of me is wanting to use the EXISTS, because I just feel it is faster, but the execution plan below seems to contradict that perception.
Which of the two queries below is faster, is it the IN version or the EXISTS version?
IN version:
explain analyze
select ceil(
( count(distinct company_rec_id)::numeric + 1 )
/ ((1)) )::int
from
parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
(((E'')) <> '' and to_tsvector(c.company) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((0)) <> 0 and ord.parcel_number = ((0)))
or ((0)) = 0
)
and parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from parcel_application_shoe
where
(
(((E'')) <> '' and to_tsvector(extract_words(shoe_description)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(order_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(style_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
)
)
"Aggregate (cost=1060.73..1060.75 rows=1 width=37) (actual time=29.028..29.028 rows=1 loops=1)"
" -> Hash Join (cost=880.43..1053.04 rows=3074 width=37) (actual time=13.261..16.365 rows=3074 loops=1)"
" Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
" -> Hash Join (cost=864.79..995.14 rows=3074 width=37) (actual time=13.115..15.027 rows=3074 loops=1)"
" Hash Cond: ((ord.parcel_rec_id)::text = (parcel_application.parcel_rec_id)::text)"
" -> Seq Scan on parcel ord (cost=0.00..78.87 rows=3087 width=74) (actual time=0.005..0.373 rows=3087 loops=1)"
" -> Hash (cost=826.37..826.37 rows=3074 width=37) (actual time=13.102..13.102 rows=3074 loops=1)"
" -> HashAggregate (cost=795.63..826.37 rows=3074 width=37) (actual time=11.835..12.281 rows=3074 loops=1)"
" -> Hash Join (cost=541.34..787.59 rows=3218 width=37) (actual time=7.076..10.286 rows=3218 loops=1)"
" Hash Cond: ((parcel_application.parcel_application_rec_id)::text = (parcel_application_shoe.parcel_application_rec_id)::text)"
" -> Seq Scan on parcel_application (cost=0.00..122.18 rows=3218 width=74) (actual time=0.004..0.710 rows=3218 loops=1)"
" -> Hash (cost=501.12..501.12 rows=3218 width=37) (actual time=7.061..7.061 rows=3218 loops=1)"
" -> HashAggregate (cost=468.94..501.12 rows=3218 width=37) (actual time=5.721..6.220 rows=3218 loops=1)"
" -> Seq Scan on parcel_application_shoe (cost=0.00..442.95 rows=10395 width=37) (actual time=0.004..2.318 rows=10395 loops=1)"
" -> Hash (cost=11.95..11.95 rows=295 width=37) (actual time=0.136..0.136 rows=295 loops=1)"
" -> Seq Scan on company c (cost=0.00..11.95 rows=295 width=37) (actual time=0.013..0.068 rows=295 loops=1)"
"Total runtime: 29.122 ms"
EXISTS version:
explain analyze
select ceil(
( count(distinct company_rec_id)::numeric + 1 )
/ ((1)) )::int from
parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
(((E'')) <> '' and to_tsvector(c.company) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((0)) <> 0 and ord.parcel_number = ((0)))
or ((0)) = 0
)
and exists
(
select * from parcel_application pa
where pa.parcel_rec_id = ord.parcel_rec_id
and
exists
(
select * from parcel_application_shoe ord_item
where
ord_item.parcel_application_rec_id = pa.parcel_application_rec_id
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.shoe_description)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.order_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.style_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
)
)
"Aggregate (cost=17773.13..17773.16 rows=1 width=37) (actual time=34.519..34.520 rows=1 loops=1)"
" -> Hash Join (cost=17636.59..17769.11 rows=1609 width=37) (actual time=17.174..20.426 rows=3074 loops=1)"
" Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
" -> Hash Join (cost=17620.95..17731.35 rows=1609 width=37) (actual time=16.882..18.862 rows=3074 loops=1)"
" Hash Cond: ((ord.parcel_rec_id)::text = (pa.parcel_rec_id)::text)"
" -> Seq Scan on parcel ord (cost=0.00..78.87 rows=3087 width=74) (actual time=0.006..0.409 rows=3087 loops=1)"
" -> Hash (cost=17601.74..17601.74 rows=1537 width=37) (actual time=16.858..16.858 rows=3074 loops=1)"
" -> HashAggregate (cost=17586.37..17601.74 rows=1537 width=37) (actual time=15.015..15.535 rows=3074 loops=1)"
" -> Seq Scan on parcel_application pa (cost=0.00..17582.35 rows=1609 width=37) (actual time=10.040..12.440 rows=3218 loops=1)"
" Filter: (alternatives: SubPlan 1 or hashed SubPlan 2)"
" SubPlan 1"
" -> Index Scan using fki_parcel_application_shoe__parcel_application on parcel_application_shoe ord_item (cost=0.00..16.28 rows=3 width=0) (never executed)"
" Index Cond: ((parcel_application_rec_id)::text = ($0)::text)"
" SubPlan 2"
" -> Seq Scan on parcel_application_shoe ord_item (cost=0.00..442.95 rows=10395 width=37) (actual time=0.005..4.482 rows=10395 loops=1)"
" -> Hash (cost=11.95..11.95 rows=295 width=37) (actual time=0.284..0.284 rows=295 loops=1)"
" -> Seq Scan on company c (cost=0.00..11.95 rows=295 width=37) (actual time=0.010..0.155 rows=295 loops=1)"
"Total runtime: 34.645 ms"
This is the final query I end up using, the IN version. I can only perceived marginal performance difference between
IN and EXISTS versions. I'm thinking more about the readability for the next maintainer, hence I used IN
select ord.parcel_number, ord.received_date, c.company
from parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
:_company = ''
or
to_tsvector(c.company) @@ plainto_tsquery(extract_words(:_company))
)
and
(
:_fttsc_num = 0
or ord.parcel_number = :_fttsc_num
)
and
(
(:_item = '' and :_order_num = '' and :_style_num = '')
or
ord.parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from parcel_application_shoe
where
(
:_item = ''
or to_tsvector(extract_words(shoe_description)) @@ plainto_tsquery(extract_words(:_item))
)
and
(
:_order_num = ''
or to_tsvector(extract_words(order_number)) @@ plainto_tsquery(extract_words(:_order_num))
)
and
(
:_style_num = ''
or to_tsvector(extract_words(style_number)) @@ plainto_tsquery(extract_words(:_style_num))
)
) -- parcel_application_rec_id IN
) -- parcel_rec_id IN
)
and
-- material filter...
(
(:_material = '')
or
-- implied material <> ''
parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from mlist
where mlist_rec_id in
(
select mlist_rec_id
from mlist_detail
join material m using(material_rec_id)
where to_tsvector(extract_words(m.material)) @@ plainto_tsquery(extract_words(:_material))
)
)
)
)
-- ...material filter
and
-- parameter filter...
(
(:_parameter = '')
or
-- implied parameter <> ''
parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from mlist
where mlist_rec_id in
(
select mlist_rec_id
from mlist_detail
where mlist_detail_rec_id in
(
select mlist_detail_rec_id
from mlist_detail_parameter
join parameter p using(parameter_rec_id)
where to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words(:_parameter))
)
)
)
)
)
-- ...parameter filter
order by ord.received_date
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
您可能想要重新检查嵌套的 EXISTS() 子查询集。 在过去使用这些似乎效果不佳因为在内连接上有一个 EXISTS。
所以一般来说,将以下内容转换
为:
You might want to re-examine that nested set of EXISTS() subqueries. In the past it appears that using those doesn't perform as well as having a single EXISTS on an inner join.
so in general, transform something like:
into