sqrrl
is a small collection of utility functions that help build text-based SQL queries in an R-style native-feeling and functional manner.
Unlike other packages that build SQL queries using an object-oriented style, sqrrl
provides small functions that produce SQL snippets and can be linked together to compose SQL queries. The result is that the code to produce the SQL statement reads much like the SQL statement iteself. On the other hand, sqrrl
doesn’t know anything about your database and can’t help you out with completions, etc.
> SELECT()
SELECT *
> SELECT('col1', 'col2', 'col3')
SELECT col1, col2, col3
> SELECT(newcol = 'col1', avg_col1 = 'mean(col1)')
SELECT col1 AS newcol, mean(col1) AS avg_col1
> SELECT(letters[1:3], 't2' = letters[4:6])
SELECT a, b, c, t2.d, t2.e, t2.f
> SELECT(a = 'apple', b = 'banana', c = 'cherry')
SELECT apple AS a, banana AS b, cherry AS c
> SELECT('t1' = c(a = 'apple', b = 'banana'), c = 'cherry')
SELECT t1.apple AS a, t1.banana AS b, cherry AS c
> SELECT('t1' = c(a = 'apple', b = 'banana'), c = 'cherry', 't2' = c(d = 'dragon_fruit'))
SELECT t1.apple AS a, t1.banana AS b, cherry AS c, t2.dragon_fruit AS d
> FROM('table1')
FROM table1
> FROM('table1', 'table2')
FROM table1 , table2
> FROM(t1 = 'table1', t2 = 'table2', 'table3')
FROM table1 t1, table2 t2, table3
> WHERE('col1 IS NOT NULL')
WHERE col1 IS NOT NULL
> WHERE(cond = TRUE, 'col1 = 2', 'col2 >= 10')
WHERE col1 = 2 AND col2 >= 10
> WHERE(cond = FALSE, 'col1 = 2', 'col2 >= 10')
""
> 'id' %IN% 1:4
id IN (1, 2, 3, 4)
> 'id' %IN% letters[1:3]
id IN ("a", "b", "c")
> # Note: if left-hand-side length == 1, then LHS is unquoted (for subqueries)
> 'id' %IN% paste(SELECT('id'), FROM('other_table'))
id IN (SELECT id FROM other_table )
> 'in' %IN% quotes(letters[1])
`in` IN ("a")
> 'text_col' %LIKE% 'Prefix%'
text_col LIKE("Prefix%")
> 'text_col' %LIKE% c('Prefix 1%', 'Prefix 2%')
(text_col LIKE("Prefix 1%") OR text_col LIKE("Prefix 2%"))
> eq(id = 4)
id=4
> neq(id = 4)
id!=4
> lt(id = 4)
id<4
> leq(id = 4)
id<=4
> gt(id = 4)
id>4
> geq(id = 4)
id>=4
sqrrl
also provides a simple wrapper for https://github.com/andialbrecht/sqlparse, a Python package for formatting SQL queries. sqlparse
can be installed via pip install --upgrade sqlparse
, thereafter making available the system command sqlformat
.
sqrrl::sqlformat()
pretty-prints SQL queries, such as the one above.
> example <- sqlformat(example_query, header = 'A Beautifully Formatted Example Query')
> cat(example)
> # Using one ID
> JOIN(left_ref = 'left_tbl', right_tbls = 'right_tbl', on = 'id')
JOIN right_tbl USING (id)
> LEFT_JOIN('l', c('r' = 'right_tbl'), 'id')
LEFT JOIN right_tbl r USING (id)
> RIGHT_JOIN('l', c('r' = 'right_tbl'), 'id', prefer_using = FALSE)
RIGHT JOIN right_tbl r ON l.id=r.id
> # Join on multiple columns, with different names on left and right
> JOIN(type = 'natural right', 'left_tbl', 'right_tbl', c('left.col1' = 'right.col1', 'id2'))
NATURAL RIGHT JOIN right_tbl ON (left_tbl.`left.col1`=right_tbl.`right.col1` AND left_tbl.id2=right_tbl.id2)
> # Join multiple tables on same column
> INNER_JOIN('left_tbl', c('right_1', 'right_2'), 'id_col')
INNER JOIN (right_1, right_2) USING (id_col)
> # Join multiple tables on different columns
> OUTER_JOIN('l', c(r1 = 'right_1', r2 = 'right_2'), list('col1', 'col2'))
OUTER JOIN (right_1 r1, right_2 r2) ON (l.col1=r1.col1 AND l.col2=r2.col2)
> # Join multiple tables on different coluns with different column names
> JOIN('l', c(r1 = 'right_1', r2 = 'right_2'), list(c(right_1_id = 'id', c(right_2_id = 'id'))))
JOIN (right_1 r1, right_2 r2) ON (l.right_1_id=r1.id AND l.right_2_id=r1.id AND l.right_1_id=r2.id AND l.right_2_id=r2.id)
> iris_example <- iris[c(1, 51, 101), ]
> # Insert all rows & columns from a data.frame
> INSERT_INTO_VALUES('iris', iris_example)
INSERT INTO iris (`Sepal.Length`, `Sepal.Width`, `Petal.Length`, `Petal.Width`, Species) VALUES (5.1, 3.5, 1.4, 0.2, "setosa"), (7, 3.2, 4.7, 1.4, "versicolor"), (6.3, 3.3, 6, 2.5, "virginica")
> # Insert select columns from a data.frame
> INSERT_INTO_VALUES('iris', iris_example, c('Petal.Length', 'Petal.Width', 'Species'))
INSERT INTO iris (`Petal.Length`, `Petal.Width`, Species) VALUES (1.4, 0.2, "setosa"), (4.7, 1.4, "versicolor"), (6, 2.5, "virginica")
> # Insert named vector
> INSERT_INTO_VALUES('iris', c('Petal.Length' = 1.9, 'Petal.Width' = 0.2, 'Species' = 'setosa'))
INSERT INTO iris (`Petal.Length`, `Petal.Width`, Species) VALUES (1.9, 0.2, "setosa")
> # Insert subset of named vector
> INSERT_INTO_VALUES('iris', c('Petal.Length' = 1.9, 'Petal.Width' = 0.2, 'Species' = 'setosa'),
+ cols = c('Petal.Width', 'Species'))
INSERT INTO iris (`Petal.Width`, Species) VALUES (0.2, "setosa")
> # Insert just vector of mixed type without column names
> INSERT_INTO_VALUES('iris', c(6.5, 3.2, 5.1, 2, 'virginica'))
INSERT INTO iris VALUES (6.5, 3.2, 5.1, 2, "virginica")
> UPDATE('t1', col1 = 'a')
UPDATE t1 SET col1="a"
> UPDATE('t1', col1 = ~col2 * 1.25)
UPDATE t1 SET col1=col2 * 1.25
> UPDATE('t1', col1 = 'a', col2 = 42, .where = 'id' %IN% 1:5)
UPDATE t1 SET col1="a", col2=42 WHERE id IN (1, 2, 3, 4, 5)
> # Update provides .where argument, but it's easier to add this outside UPDATE()
> UPDATE('t1', col1 = 'a', col2 = 42) %+% WHERE('id' %IN% 1:5)
UPDATE t1 SET col1="a", col2=42 WHERE id IN (1, 2, 3, 4, 5)
UPDATE iris SET some_column=1, some_other_col="high" WHERE another_col=2
UPDATE t SET id=id + 1 ORDER BY id DESC
> # Multiple tables
> UPDATE(c('items', 'month'), items.price = ~month.price, .where = eq(items.id = ~month.id))
UPDATE items , MONTH SET items.price=month.price WHERE items.id=month.id
UPDATE items , MONTH SET items.price=month.price WHERE items.id=month.id