-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathsqlite.txt
More file actions
165 lines (127 loc) · 7.33 KB
/
sqlite.txt
File metadata and controls
165 lines (127 loc) · 7.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
## Notes on sqlite
brew install sqlite
sqlite3 command actually in anaconda
And apparently RSQLite and sqldb packages for R also include it
---
In R:
library(RSQLite)
Good tutorial at
https://web.archive.org/web/20141006200249/https://sandymuspratt.blogspot.com/2012/11/r-and-sqlite-part-1.html
con <- dbConnect(SQLite(), dbname="Test.sqlite")
dbWriteTable(con, "mtcars", mtcars) # dump data frame into database
dbListTables(con)
dbListFields(con, "mtcars")
x <- dbReadTable(con, "mtcars") # read the whole thing
dbRemoveTable(con, "mtcars")
dbDisconnect(con)
Software Carpentry example
download.file("https://files.software-carpentry.org/survey.db", "survey.db")
con <- dbConnect(SQLite(), dbname="survey.db")
dbListTables(con) # Person, Site, Survey, Visited
person <- fetch(rs <- dbSendQuery(con, "SELECT * FROM Person")) # read all of Person table
person2 <- dbReadTable(con, "Survey") # another way to read the whole thing
x <- fetch(rs <- dbSendQuery(con, "SELECT name FROM Site"))
# just distinct values
fetch(rs <- dbSendQuery(con, "SELECT DISTINCT quant FROM Survey"))
# distinct values for pairs of columns
fetch(rs <- dbSendQuery(con, "SELECT DISTINCT taken, quant FROM Survey"))
# ordered results
fetch(rs <- dbSendQuery(con, "SELECT * From Person ORDER BY ident"))
fetch(rs <- dbSendQuery(con, "SELECT * From Person ORDER BY ident DESC"))
fetch(rs <- dbSendQuery(con, "SELECT * FROM Survey ORDER BY taken ASC, person DESC"))
# filtering
fetch(rs <- dbSendQuery(con, "SELECT * FROM Visited WHERE site='DR-1'"))
fetch(rs <- dbSendQuery(con, "SELECT * FROM Visited WHERE site='DR-1' AND dated<'1930-01-01'"))
# Note: SQLite doesn't have a date type but either uses text or numbers
fetch(rs <- dbSendQuery(con, "SELECT * FROM Survey WHERE person='lake' OR person='roe'"))
fetch(rs <- dbSendQuery(con, "SELECT * FROM Survey WHERE person IN ('lake','roe')"))
fetch(rs <- dbSendQuery(con, "SELECT * FROM Survey WHERE quant='sal' AND (person='lake' OR person='roe')"))
fetch(rs <- dbSendQuery(con, "SELECT * FROM Survey WHERE quant='sal' AND (person='lake' OR person='roe')"))
fetch(rs <- dbSendQuery(con, "SELECT DISTINCT person, quant FROM Survey WHERE person IN ('lake', 'roe')"))
# crude regex
fetch(rs <- dbSendQuery(con, "SELECT * FROM Visited WHERE site LIKE 'DR%'"))
dbGetQuery(con, "SELECT DISTINCT quant FROM Survey")
dbGetQuery(con, "SELECT * FROM Survey where quant='sal' AND (reading < 0 OR reading > 1)")
# calculating new values
### mutliply rad by 1.05 and round to 2 digits
dbGetQuery(con, "SELECT taken, round(1.05 * reading, 2) FROM Survey WHERE quant='rad'")
### string concatenation
dbGetQuery(con, "SELECT personal || ' ' || family FROM Person")
dbGetQuery(con, "SELECT personal || ' ' || family AS name FROM Person")
dbGetQuery(con, "SELECT reading/100 FROM Survey WHERE quant='sal' AND person='roe'")
# union: combine two queries
dbGetQuery(con, paste("SELECT taken,person,reading FROM Survey WHERE person!='roe' AND quant='sal'",
"UNION SELECT taken,person,reading/100 FROM Survey WHERE person='roe'"))
# some string functions
dbGetQuery(con, "SELECT DISTINCT substr(site, 0, instr(site, '-')) AS 'Major site' FROM Visited")
# NULL values
dbGetQuery(con, "SELECT * FROM Visited WHERE dated IS NULL")
dbGetQuery(con, "SELECT * FROM Visited WHERE dated IS NOT NULL")
dbGetQuery(con, paste("SELECT taken,person,reading FROM Survey WHERE (person IS NULL OR person!='roe') AND quant='sal'",
"UNION SELECT taken,person,reading/100 FROM Survey WHERE person='roe'"))
# aggregation
dbGetQuery(con, "SELECT min(dated) FROM Visited")
dbGetQuery(con, "SELECT max(dated) FROM Visited")
dbGetQuery(con, "SELECT avg(reading) FROM Survey where quant='sal'")
dbGetQuery(con, "SELECT count(reading) FROM Survey where quant='sal'")
dbGetQuery(con, "SELECT sum(reading) FROM Survey where quant='sal'")
dbGetQuery(con, "SELECT person, avg(reading) FROM Survey WHERE quant='sal' GROUP BY person")
dbGetQuery(con, "SELECT quant, person, avg(reading) FROM Survey GROUP BY person, quant ORDER BY quant, person")
dbGetQuery(con, "SELECT quant, person, avg(reading) FROM Survey WHERE person IS NOT NULL GROUP BY person, quant ORDER BY quant, person")
# group_concat
dbGetQuery(con, "SELECT group_concat(personal || ' ' || family, ', ') AS names FROM Person")
dbGetQuery(con, "SELECT * FROM Site JOIN Visited ON Site.name = Visited.site")
dbGetQuery(con, "SELECT Site.lat, Site.long, Visited.dated FROM Site JOIN Visited WHERE Site.name = Visited.site")
# joins
dbGetQuery(con, "SELECT * FROM Site JOIN Visited") # full cross-product
dbGetQuery(con, "SELECT Site.lat, Site.long, Visited.dated FROM Site JOIN Visited WHERE Site.name = Visited.site")
dbGetQuery(con, paste("SELECT Site.lat, Site.long, Visited.dated, Survey.quant, Survey.reading",
"FROM Site",
"JOIN Visited ON Site.name = Visited.site",
"JOIN Survey ON Visited.ident = Survey.taken",
"WHERE Visited.dated IS NOT NULL"))
dbGetQuery(con, paste("SELECT * FROM Visited",
"JOIN Survey ON Visited.ident = Survey.taken",
"WHERE site='DR-1' AND quant='rad'"))
# SQLite automatically numbers records with 'rowid'
dbGetQuery(con, "SELECT rowid,* FROM Person")
# list all fields in each table
sapply(dbListTables(con), function(tab) dbListFields(con, tab))
# last challenge at https://swcarpentry.github.io/sql-novice-survey/07-join/
dbGetQuery(con, paste("SELECT Visited.site, Site.lat, Site.long, Visited.dated, Person.personal, Person.family, Survey.quant, Survey.reading",
"FROM Site JOIN Visited ON Site.name = Visited.site",
"JOIN Survey ON Survey.taken = Visited.ident",
"JOIN Person ON Person.ident = Survey.person",
"WHERE Visited.dated IS NOT NULL",
"ORDER BY Visited.dated"))
# create a new table
dbGetQuery(con, "CREATE TABLE JustLatLong(lat text, long text)")
# fill with values from Site
dbGetQuery(con, "INSERT INTO JustLatLong SELECT lat, long FROM Site")
# insert some other values
dbGetQuery(con, "INSERT INTO JustLatLong values(43.106,-89.425)")
# update
dbGetQuery(con, "UPDATE Site SET lat=-47.87, long=-122.40 WHERE name='MSK-4'")
# delete a record
dbGetQuery(con, "DELETE FROM Person WHERE ident='danforth'")
# replace NULLs with 'unknown'
dbGetQuery(con, "UPDATE Survey SET person='unknown' WHERE person IS NULL")
dbDisconnect(con)
---
Note that = and == are the same, in a WHERE clause
---
# list indexes
dbGetQuery(db, "select * from sqlite_master where type='index'")
---
# drop index "chr_pos"
dbGetQuery(db, "drop index chr_pos")
---
# delete records
dbGetQuery(db, "DELETE FROM variants WHERE type='indel' OR type='sv'")
---
# create an index
dbGetQuery(db, "CREATE INDEX chr_pos ON variants(chr, pos)")
dbGetQuery(db, "CREATE INDEX snp_id ON variants(snp_id)")
note that for large DB these:
- make queries *way* faster (like 5 sec -> 1 ms)
- hugely inflate the .sqlite file (like +700 MB)