优化 CLUSTERED INDEX 以与 JOIN 一起使用
表 optin_channel_1
(每个“通道”都有一个专用表)
CREATE TABLE [dbo].[optin_channel_1](
[key_id] [bigint] NOT NULL,
[valid_to] [datetime] NOT NULL,
[valid_from] [datetime] NOT NULL,
[key_type_id] [int] NOT NULL,
[optin_flag] [tinyint] NOT NULL,
[source_proc_id] [int] NOT NULL,
[date_inserted] [datetime] NOT NULL
) ON [PRIMARY]
CREATE CLUSTERED INDEX [ix_id] ON [dbo].[optin_channel_1]
(
[key_type_id] ASC,
[key_id] ASC,
[valid_to] ASC,
[valid_from] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
表 profile_conns
CREATE TABLE [dbo].[profile_conns](
[profile_key_id] [bigint] NOT NULL,
[valid_to] [datetime] NOT NULL,
[valid_from] [datetime] NOT NULL,
[conn_key_id] [bigint] NOT NULL,
[conn_key_type_id] [int] NOT NULL,
[conn_type_id] [int] NOT NULL,
[source_proc_id] [int] NOT NULL,
[date_inserted] [datetime] NOT NULL
) ON [PRIMARY]
CREATE CLUSTERED INDEX [ix_id] ON [dbo].[profile_conns]
(
[profile_key_id] ASC,
[conn_key_type_id] ASC,
[conn_key_id] ASC,
[valid_to] ASC,
[valid_from] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
表 lu_channel_conns
CREATE TABLE [dbo].[lu_channel_conns](
[channel_id] [int] NOT NULL,
[conn_type_id] [int] NOT NULL,
CONSTRAINT [PK_lu_channel_conns] PRIMARY KEY CLUSTERED
(
[channel_id] ASC,
[conn_type_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
表 lu_conn_type
CREATE TABLE [dbo].[lu_conn_type](
[conn_type_id] [int] NOT NULL,
[default_key_type_id] [int] NOT NULL,
[master_key_type_id] [int] NOT NULL,
[date_inserted] [datetime] NOT NULL,
CONSTRAINT [PK_lu_conns] PRIMARY KEY CLUSTERED
(
[conn_type_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
视图 v_source_proc_id_by_group_id
SELECT DISTINCT x.source_proc_id, x.source_proc_group_id
FROM lu_source_proc x INNER JOIN lu_source_proc_group y ON x.source_proc_group_id = y.group_id
将执行一个动态 SQL 语句:
SET @sql_str='SELECT @ret=MAX(o.optin_flag)
FROM optin_channel_'+CAST(@channel_id AS NVARCHAR(100))+' o
INNER HASH JOIN dbo.v_source_proc_id_by_group_id y ON o.source_proc_id=y.source_proc_id AND y.source_proc_group_id=@source_proc_group_id
INNER HASH JOIN profile_conns z ON z.profile_key_id=cast(@profile_key_id AS NVARCHAR(100)) AND z.conn_key_type_id=o.key_type_id AND z.conn_key_id=o.[key_id] AND z.valid_to=''01.01.3000''
INNER HASH JOIN lu_channel_conns x ON x.channel_id=@channel_id AND z.conn_type_id=x.conn_type_id
INNER HASH JOIN lu_conn_type ct ON ct.conn_type_id=x.conn_type_id AND ct.default_key_type_id=o.key_type_id'
SET @param='@channel_id INT, @profile_key_id INT, @source_proc_group_id INT, @ret NVARCHAR(400) OUTPUT'
EXEC sp_executesql @sql_str,@param,@channel_id,@profile_key_id,@source_proc_group_id,@ret OUTPUT
即,这给出:
SELECT @ret=MAX(o.optin_flag) AS optin_flag
FROM optin_channel_1 o
INNER HASH JOIN dbo.v_source_proc_id_by_group_id y
ON o.source_proc_id=y.source_proc_id
AND y.source_proc_group_id=5
INNER HASH JOIN profile_conns z
ON z.profile_key_id=1
AND z.conn_key_type_id=o.key_type_id
AND z.conn_key_id=o.[key_id]
AND z.valid_to='01.01.3000'
INNER HASH JOIN lu_channel_conns x
ON x.channel_id=1
AND z.conn_type_id=x.conn_type_id
INNER HASH JOIN lu_conn_type ct
ON ct.conn_type_id=x.conn_type_id
AND ct.default_key_type_id=o.key_type_id
这些表用于 optin 数据库。 optin_flag
可以是 0 或 1。通过最后一条语句,我想从给定 channel_id= 的
对于具有 optin_channel_1
获取 1 作为 optin_flag
1profile_key_id=1
的用户,当 optin 由属于 source_proc_group_id=5
的进程插入数据库时。我希望这足以理解正在发生的事情。
这是使用CLUSTERED INDEX
的最佳方式吗?或者最好从 profile_conns
上的索引中删除 profile_key_id
并将 z.profile_key_id=1
放在 WHERE
中条款?
可能有更好的方法来优化此选择(不可能更改数据库模式,只能更改索引和修改语句)。
table optin_channel_1
(for each 'channel' there's a dedicated table)
CREATE TABLE [dbo].[optin_channel_1](
[key_id] [bigint] NOT NULL,
[valid_to] [datetime] NOT NULL,
[valid_from] [datetime] NOT NULL,
[key_type_id] [int] NOT NULL,
[optin_flag] [tinyint] NOT NULL,
[source_proc_id] [int] NOT NULL,
[date_inserted] [datetime] NOT NULL
) ON [PRIMARY]
CREATE CLUSTERED INDEX [ix_id] ON [dbo].[optin_channel_1]
(
[key_type_id] ASC,
[key_id] ASC,
[valid_to] ASC,
[valid_from] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
table profile_conns
CREATE TABLE [dbo].[profile_conns](
[profile_key_id] [bigint] NOT NULL,
[valid_to] [datetime] NOT NULL,
[valid_from] [datetime] NOT NULL,
[conn_key_id] [bigint] NOT NULL,
[conn_key_type_id] [int] NOT NULL,
[conn_type_id] [int] NOT NULL,
[source_proc_id] [int] NOT NULL,
[date_inserted] [datetime] NOT NULL
) ON [PRIMARY]
CREATE CLUSTERED INDEX [ix_id] ON [dbo].[profile_conns]
(
[profile_key_id] ASC,
[conn_key_type_id] ASC,
[conn_key_id] ASC,
[valid_to] ASC,
[valid_from] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
table lu_channel_conns
CREATE TABLE [dbo].[lu_channel_conns](
[channel_id] [int] NOT NULL,
[conn_type_id] [int] NOT NULL,
CONSTRAINT [PK_lu_channel_conns] PRIMARY KEY CLUSTERED
(
[channel_id] ASC,
[conn_type_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
table lu_conn_type
CREATE TABLE [dbo].[lu_conn_type](
[conn_type_id] [int] NOT NULL,
[default_key_type_id] [int] NOT NULL,
[master_key_type_id] [int] NOT NULL,
[date_inserted] [datetime] NOT NULL,
CONSTRAINT [PK_lu_conns] PRIMARY KEY CLUSTERED
(
[conn_type_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
view v_source_proc_id_by_group_id
SELECT DISTINCT x.source_proc_id, x.source_proc_group_id
FROM lu_source_proc x INNER JOIN lu_source_proc_group y ON x.source_proc_group_id = y.group_id
There's a dynamic SQL statement going to be executed:
SET @sql_str='SELECT @ret=MAX(o.optin_flag)
FROM optin_channel_'+CAST(@channel_id AS NVARCHAR(100))+' o
INNER HASH JOIN dbo.v_source_proc_id_by_group_id y ON o.source_proc_id=y.source_proc_id AND y.source_proc_group_id=@source_proc_group_id
INNER HASH JOIN profile_conns z ON z.profile_key_id=cast(@profile_key_id AS NVARCHAR(100)) AND z.conn_key_type_id=o.key_type_id AND z.conn_key_id=o.[key_id] AND z.valid_to=''01.01.3000''
INNER HASH JOIN lu_channel_conns x ON x.channel_id=@channel_id AND z.conn_type_id=x.conn_type_id
INNER HASH JOIN lu_conn_type ct ON ct.conn_type_id=x.conn_type_id AND ct.default_key_type_id=o.key_type_id'
SET @param='@channel_id INT, @profile_key_id INT, @source_proc_group_id INT, @ret NVARCHAR(400) OUTPUT'
EXEC sp_executesql @sql_str,@param,@channel_id,@profile_key_id,@source_proc_group_id,@ret OUTPUT
I.e. this gives:
SELECT @ret=MAX(o.optin_flag) AS optin_flag
FROM optin_channel_1 o
INNER HASH JOIN dbo.v_source_proc_id_by_group_id y
ON o.source_proc_id=y.source_proc_id
AND y.source_proc_group_id=5
INNER HASH JOIN profile_conns z
ON z.profile_key_id=1
AND z.conn_key_type_id=o.key_type_id
AND z.conn_key_id=o.[key_id]
AND z.valid_to='01.01.3000'
INNER HASH JOIN lu_channel_conns x
ON x.channel_id=1
AND z.conn_type_id=x.conn_type_id
INNER HASH JOIN lu_conn_type ct
ON ct.conn_type_id=x.conn_type_id
AND ct.default_key_type_id=o.key_type_id
These tables are used for an optin database. optin_flag
could be 0 or 1. With the last statement I want to get a 1 as optin_flag
from optin_channel_1
for the given channel_id=1
for user with profile_key_id=1
, when optin was inserted into database by process belonging to source_proc_group_id=5
. I hope this is enough to comprehend what's going on.
Is this the best way to use the CLUSTERED INDEX
'es? Or would it be better to remove profile_key_id
from index on profile_conns
and put z.profile_key_id=1
in a WHERE
clause?
May be there's a much better way for optimizing this select (changes in database schema is not possible, only changes on indexes and modifing statement).
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
如果不知道表的大小以及表中存储的数据类型,就很难衡量。
假设 optin_channel_1 有很多数据并且 profile_cons 有很多数据,我会尝试以下操作:
基本上,如果你的表profile_conns表没有太多数据,我会放置聚集索引在最分散的“过滤器”字段上(我怀疑 profile_key_id )。如果表有大量数据,我会进行哈希/合并连接,并将聚集索引与 optin_channel_1 表的聚集索引进行匹配。
我还会这样重写查询:
查询会这样更改,因为:
综上所述:
Without knowing the size of the tables and the sort of data stored in it them it is difficult to gauge.
Assuming optin_channel_1 has a lot of data and profile_cons has a lot of data I would try the following:
Basically, if your table profile_conns table has not much data, I would put the clustered index on the most fragmented "filter" field (I suspect profile_key_id). If the table has a lot of data I would aim for a hash/merge join and match the clustered index with the clustered index of the optin_channel_1 table.
I would also rewrite the query as such:
The query changed this way because:
So as summary: