返回介绍

Using SQLite3

发布于 2025-02-25 23:43:38 字数 7857 浏览 0 评论 0 收藏 0

Working example dataset

This data contains the survival time after receiving a heart transplant, the age of the patient and whether or not the survival time was censored

  • Number of Observations - 69
  • Number of Variables - 3

Variable name definitions:: * death - Days after surgery until death * age - age at the time of surgery * censored - indicates if an observation is censored. 1 is uncensored

import statsmodels.api as sm
heart = sm.datasets.heart.load_pandas().data
heart.take(np.random.choice(len(heart), 6))
 survivalcensorsage
66110023.7
241367048.6
30897146.1
6713028.9
49499052.2
35322148.1
import sqlite3
conn = sqlite3.connect('heart.db')

Creating and populating a table

c = conn.cursor()

c.execute('''CREATE TABLE IF NOT EXISTS transplant
             (survival integer, censors integer, age real)''')

c.executemany("insert into transplant(survival, censors, age) values (?, ?, ?)", heart.values);

SQL queries

SQL Queries take the form

select (distinct) ... from ... (limit ...)
where ...
groupby ..
order by ...

where most of the query apart from the select ... from ... are optional.

Selecting all columns, first 10 rows

for row in c.execute('''select * from transplant limit 5;'''):
    print row
(15, 1, 54.3)
(3, 1, 40.4)
(624, 1, 51.0)
(46, 1, 42.5)
(127, 1, 48.0)

Using where to filter rows

# only find censored data for subjects < 40 years old
for row in c.execute('''
select * from transplant
where censors=0 and age < 40 limit 5;'''):
    print row
(1775, 0, 33.3)
(1106, 0, 36.8)
(875, 0, 38.9)
(815, 0, 32.7)
(592, 0, 26.7)

Using SQL functions

for row in c.execute('''select count(*), avg(age) from transplant where censors=0 and age < 40;'''):
    print row
(9, 31.43333333333333)

Using groupby to find number of cnesored and uncensored subjects and thier average age

query = '''
select censors, count(*), avg(age) from transplant
group by censors;
'''
for row in c.execute(query):
    print row
(0, 24, 41.729166666666664)
(1, 45, 48.484444444444456)

Using having to filter grouped results

query = '''
select censors, count(*), avg(age) from transplant
group by censors
having avg(age) < 45;
'''
for row in c.execute(query):
    print row
(0, 24, 41.729166666666664)

Using order by to sort results

query = '''
select * from transplant
where age < 40
order by age desc;
'''
for row in c.execute(query):
    print row
(875, 0, 38.9)
(1106, 0, 36.8)
(44, 1, 36.2)
(1, 0, 35.2)
(1775, 0, 33.3)
(815, 0, 32.7)
(12, 1, 29.2)
(13, 0, 28.9)
(592, 0, 26.7)
(167, 0, 26.7)
(110, 0, 23.7)
(228, 1, 19.7)

Reading into a numpy structured array

result = c.execute(query).fetchall()
arr = np.fromiter(result, dtype='i4,i4,f4')
arr.dtype.names = ['survival', 'censors', 'age']
print '\n'.join(map(str, arr))
(875, 0, 38.900001525878906)
(1106, 0, 36.79999923706055)
(44, 1, 36.20000076293945)
(1, 0, 35.20000076293945)
(1775, 0, 33.29999923706055)
(815, 0, 32.70000076293945)
(12, 1, 29.200000762939453)
(13, 0, 28.899999618530273)
(592, 0, 26.700000762939453)
(167, 0, 26.700000762939453)
(110, 0, 23.700000762939453)
(228, 1, 19.700000762939453)

Reading into a numpy regular array

from itertools import chain
result = c.execute(query).fetchall()
arr = np.fromiter(chain.from_iterable(result), dtype=np.float)
print arr.reshape(-1,3)
[[  8.7500e+02   0.0000e+00   3.8900e+01]
 [  1.1060e+03   0.0000e+00   3.6800e+01]
 [  4.4000e+01   1.0000e+00   3.6200e+01]
 [  1.0000e+00   0.0000e+00   3.5200e+01]
 [  1.7750e+03   0.0000e+00   3.3300e+01]
 [  8.1500e+02   0.0000e+00   3.2700e+01]
 [  1.2000e+01   1.0000e+00   2.9200e+01]
 [  1.3000e+01   0.0000e+00   2.8900e+01]
 [  5.9200e+02   0.0000e+00   2.6700e+01]
 [  1.6700e+02   0.0000e+00   2.6700e+01]
 [  1.1000e+02   0.0000e+00   2.3700e+01]
 [  2.2800e+02   1.0000e+00   1.9700e+01]]

Working wiht multiple tables in SQL

We will consturct a new database with 2 tables to illustrate the concept of joins.

conn1 = sqlite3.connect('samples.db')
c1 = conn1.cursor()

c1.execute(
'''
CREATE TABLE IF NOT EXISTS t1(
  ID TEXT,
  Name TEXT,
  Value Real);
''')

c1.execute('''
CREATE TABLE IF NOT EXISTS t2(
  ID TEXT,
  Name TEXT,
  Value Real,
  Age INTEGER);
''');

from string import ascii_lowercase
for i in range(5):
    c1.execute('''insert into t1(ID, Name, Value) values (%d, '%s', %.2f)''' % (i, ascii_lowercase[i], i*i));
    c1.execute('''insert into t2(ID, Name, Value, Age) values (%d, '%s', %.2f, %d)''' % (i*2, ascii_lowercase[i*2], i*i+5, 10*i));

Cartesian product

# Without specifiying a join, the result is all possible combinations
query = '''
select t1.ID, t2.ID from t1, t2;
'''
for row in c1.execute(query):
    print row
(u'0', u'0')
(u'0', u'2')
(u'0', u'4')
(u'0', u'6')
(u'0', u'8')
(u'1', u'0')
(u'1', u'2')
(u'1', u'4')
(u'1', u'6')
(u'1', u'8')
(u'2', u'0')
(u'2', u'2')
(u'2', u'4')
(u'2', u'6')
(u'2', u'8')
(u'3', u'0')
(u'3', u'2')
(u'3', u'4')
(u'3', u'6')
(u'3', u'8')
(u'4', u'0')
(u'4', u'2')
(u'4', u'4')
(u'4', u'6')
(u'4', u'8')

Inner joins

# Inner join (intersection)
query = '''
select t1.ID, t2.ID, t1.value, t2.value, t1.value * t2.value from t1, t2
where t1.ID = t2.ID;
'''
for row in c1.execute(query):
    print row
(u'0', u'0', 0.0, 5.0, 0.0)
(u'2', u'2', 4.0, 6.0, 24.0)
(u'4', u'4', 16.0, 9.0, 144.0)
# left join keeps all values from the left table (t2)
# and values from the right (t1) where there is a match
query = '''
select t1.id, t2.ID, t1.value, t2.value from t2 left join t1 on t1.ID = t2.ID
'''
for row in c1.execute(query):
    print row
(u'0', u'0', 0.0, 5.0)
(u'2', u'2', 4.0, 6.0)
(u'4', u'4', 16.0, 9.0)
(None, u'6', None, 14.0)
(None, u'8', None, 21.0)
# same join but we swtich left and right tables
query = '''
select t1.ID, t2.ID, t1.value, t2.value from t1 left join t2 on t1.ID = t2.ID
'''
for row in c1.execute(query):
    print row
(u'0', u'0', 0.0, 5.0)
(u'1', None, 1.0, None)
(u'2', u'2', 4.0, 6.0)
(u'3', None, 9.0, None)
(u'4', u'4', 16.0, 9.0)

Self-joins

# we can join a table to itself by using aliases
# lets add a few more rows to t1 which may have the same id and name but different values

for i in range(5):
    c1.execute('''insert into t1(ID, Name, Value) values (%d, '%s', %.2f)''' % (i, ascii_lowercase[i], i*i*i));

for row in c1.execute('select * from t1;'):
    print row
(u'0', u'a', 0.0)
(u'1', u'b', 1.0)
(u'2', u'c', 4.0)
(u'3', u'd', 9.0)
(u'4', u'e', 16.0)
(u'0', u'a', 0.0)
(u'1', u'b', 1.0)
(u'2', u'c', 8.0)
(u'3', u'd', 27.0)
(u'4', u'e', 64.0)
# Now use a self-join to find paired values for the same ID and name

query = '''
select t1a.ID, t1a.Name, t1a.value, t1b.value from t1 as t1a, t1 as t1b
where t1a.Name = t1b.Name and t1a.Value < t1b.Value
order by t1a.ID ASC;
'''
for row in c1.execute(query):
    print row
(u'2', u'c', 4.0, 8.0)
(u'3', u'd', 9.0, 27.0)
(u'4', u'e', 16.0, 64.0)

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
    我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
    原文