Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Query.jl v1.1.0 Release Notes
* Add @pivot_wider and @pivot_longer

# Query.jl v0.12.2 Release Notes
* Fix some bugs in the @select macro

Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
[compat]
IterableTables = "0.8.2, 0.9, 0.10, 0.11, 1"
julia = "1.10"
QueryOperators = "0.9.1"
QueryOperators = "1"
DataValues = "0.4.4"
MacroTools = "0.4.4, 0.5"

Expand Down
75 changes: 75 additions & 0 deletions docs/src/standalonequerycommands.md
Original file line number Diff line number Diff line change
Expand Up @@ -530,3 +530,78 @@ println(q)
2 │ 2 Unknown
3 │ 0 Three
```

## The `@pivot_longer` command

The `@pivot_longer` command reshapes data from wide format to long format. Each row in the source is expanded into one output row per pivot column. Non-pivot columns are retained as-is, and two new columns are added: `:variable` (holding the original column name as a `Symbol`) and `:value` (holding the cell value).

Columns to pivot are selected with the same rich selector syntax as `@select`:

| Syntax | Meaning |
|---------------------------|-----------------------------------------------------|
| `:col` | Include column by name |
| `startswith("prefix")` | Include columns whose name starts with `"prefix"` |
| `endswith("suffix")` | Include columns whose name ends with `"suffix"` |
| `occursin("sub")` | Include columns whose name contains `"sub"` |
| `!(startswith("prefix"))` | Exclude columns whose name starts with `"prefix"` |
| `-(startswith("prefix"))` | Same as above |
| `-:col` | Exclude column by name |
| `n` (integer) | Include column at position `n` |
| `:from::to` | Include a name range (inclusive) |
| `a:b` (integers) | Include a positional range (inclusive) |

When only exclusion selectors are given (all starting with `-` or `!`), the starting set is all columns and the exclusions are removed.

The names of the output columns can be customised with the `names_to` and `values_to` keyword arguments. Both accept a `Symbol` and default to `:variable` and `:value` respectively.

#### Examples

```julia
using Query, DataFrames

df = DataFrame(year=[2017,2018], US=[1,3], EU=[2,4])

# Explicit column names
result = df |> @pivot_longer(:US, :EU) |> DataFrame
# 4×3 DataFrame: year | variable | value

# Custom output column names
result = df |> @pivot_longer(:US, :EU, names_to=:country, values_to=:sales) |> DataFrame
# 4×3 DataFrame: year | country | sales

# Predicate — pivot all columns starting with "U"
result = df |> @pivot_longer(startswith("U")) |> DataFrame

# Predicate with exclusion — pivot wk* columns except wk_total
df2 = DataFrame(id=[1,2], wk1=[10,20], wk2=[30,40], wk_total=[40,60])
result = df2 |> @pivot_longer(startswith("wk"), -:wk_total) |> DataFrame
# pivots :wk1 and :wk2 only

# Negated predicate — pivot everything except id columns
result = df2 |> @pivot_longer(!(startswith("id"))) |> DataFrame
```

## The `@pivot_wider` command

The `@pivot_wider` command reshapes data from long format to wide format. It has the form `source |> @pivot_wider(names_from, values_from)`, where `names_from` is the quoted name of the column whose values become new column names, and `values_from` is the quoted name of the column whose values populate those new columns. All other columns are used as identifier columns. Absent combinations are represented as `DataValues.DataValue{T}()` (NA).

#### Example

```julia
using Query, DataFrames

long = DataFrame(
year = [2017, 2017, 2018, 2018],
country = [:US, :EU, :US, :EU],
value = [1, 2, 3, 4]
)

result = long |> @pivot_wider(:country, :value) |> DataFrame

# 2×3 DataFrame
# Row │ year US EU
# │ Int64 Union{Missing, Int64} Union{Missing, Int64}
# ─────┼──────────────────────────────────────────────────
# 1 │ 2017 1 2
# 2 │ 2018 3 4
```
3 changes: 2 additions & 1 deletion src/Query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ using QueryOperators
export @from, @query, @count, Grouping, key

export @map, @filter, @groupby, @orderby, @orderby_descending, @unique,
@thenby, @thenby_descending, @groupjoin, @join, @mapmany, @take, @drop
@thenby, @thenby_descending, @groupjoin, @join, @mapmany, @take, @drop,
@pivot_longer, @pivot_wider

export @select, @rename, @mutate, @disallowna, @dropna, @replacena

Expand Down
160 changes: 160 additions & 0 deletions src/standalone_query_macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,163 @@ macro unique(f)
return :( i -> QueryOperators.unique(QueryOperators.query(i), $(esc(f_as_anonym_func)), $(esc(q)))) |>
helper_namedtuples_replacement
end

# Returns true when a macro argument looks like a column selector (not a data source).
function _is_pivot_selector(arg)
arg isa QuoteNode && return true
arg isa Int && return true
# Keyword argument (names_to=:x, values_to=:x) — not a selector
if arg isa Expr && (arg.head == :(=) || arg.head == :kw)
return false
end
# Negative selector: -:col or -(pred(...))
if arg isa Expr && arg.head == :call && length(arg.args) == 2 && arg.args[1] == :-
return true
end
# Logical NOT: !(pred(...))
if arg isa Expr && arg.head == :call && length(arg.args) == 2 && arg.args[1] == :!
return true
end
# Predicate call: startswith("x"), endswith("x"), occursin("x")
if arg isa Expr && arg.head == :call && length(arg.args) == 2 &&
arg.args[1] ∈ (:startswith, :endswith, :occursin)
return true
end
# Range: :a::b or 1:3 (parsed as Expr(:call, :(:), a, b))
if arg isa Expr && arg.head == :call && length(arg.args) == 3 &&
arg.args[1] == Symbol(":")
return true
end
# everything()
arg isa Expr && string(arg) == "everything()" && return true
return false
end

# Converts a single selector AST argument into a (op, arg) instruction tuple.
function _pivot_selector_to_instruction(arg)
# :col — include by name
if arg isa QuoteNode
return (:include_name, arg.value)
end
# Positive integer — include by position
if arg isa Int && arg > 0
return (:include_position, arg)
end
# Negative integer — exclude by position
if arg isa Int && arg < 0
return (:exclude_position, -arg)
end
# everything() — include all
if arg isa Expr && string(arg) == "everything()"
return (:include_all, :_)
end
if arg isa Expr
# -:col or -(pred(...))
if arg.head == :call && length(arg.args) == 2 && arg.args[1] == :-
inner = arg.args[2]
if inner isa QuoteNode
return (:exclude_name, inner.value)
elseif inner isa Expr && inner.head == :call && length(inner.args) == 2 &&
inner.args[1] ∈ (:startswith, :endswith, :occursin)
fn, str = inner.args[1], inner.args[2]
str isa AbstractString || error("@pivot_longer: argument to $fn must be a string literal")
return (Symbol("exclude_$(fn)"), Symbol(str))
end
end
# !(pred(...))
if arg.head == :call && length(arg.args) == 2 && arg.args[1] == :!
inner = arg.args[2]
if inner isa Expr && inner.head == :call && length(inner.args) == 2 &&
inner.args[1] ∈ (:startswith, :endswith, :occursin)
fn, str = inner.args[1], inner.args[2]
str isa AbstractString || error("@pivot_longer: argument to $fn must be a string literal")
return (Symbol("exclude_$(fn)"), Symbol(str))
end
end
# startswith("x"), endswith("x"), occursin("x")
if arg.head == :call && length(arg.args) == 2 &&
arg.args[1] ∈ (:startswith, :endswith, :occursin)
fn, str = arg.args[1], arg.args[2]
str isa AbstractString || error("@pivot_longer: argument to $fn must be a string literal")
return (Symbol("include_$(fn)"), Symbol(str))
end
# Range: :a::b or 1:3
if arg.head == :call && length(arg.args) == 3 && arg.args[1] == Symbol(":")
a, b = arg.args[2], arg.args[3]
if a isa Int && b isa Int
return (:include_range_idx, (a, b))
elseif a isa QuoteNode && b isa QuoteNode
return (:include_range, (a.value, b.value))
end
end
end
error("@pivot_longer: unrecognised selector argument: $arg")
end

# Returns true when a macro argument is a keyword argument (name=value).
function _is_pivot_kwarg(arg)
arg isa Expr && (arg.head == :(=) || arg.head == :kw) &&
length(arg.args) == 2 && arg.args[1] ∈ (:names_to, :values_to)
end

macro pivot_longer(args...)
isempty(args) && error("@pivot_longer requires at least one column selector argument")

# Detect pipe form vs direct form:
# pipe form — all args are selectors (first arg looks like a selector)
# direct form — first arg is the data source, rest are selectors
local source_expr, selector_args
if _is_pivot_selector(args[1])
source_expr = nothing # will use `i` as the piped source
selector_args = args
else
source_expr = args[1]
selector_args = args[2:end]
isempty(selector_args) && error("@pivot_longer requires at least one column selector")
end

# Separate keyword arguments (names_to=, values_to=) from column selectors
col_selectors = filter(a -> !_is_pivot_kwarg(a), selector_args)
kw_args = filter(_is_pivot_kwarg, selector_args)
isempty(col_selectors) && error("@pivot_longer requires at least one column selector")

# Extract keyword values
kwargs_exprs = Expr[]
for kw in kw_args
name = kw.args[1]
val = kw.args[2]
push!(kwargs_exprs, Expr(:kw, name, esc(val)))
end

# Build instruction tuple (evaluated at macro-expansion time)
instructions = Tuple(_pivot_selector_to_instruction(a) for a in col_selectors)

# Generate the call expression
function make_call(src_expr)
call_expr = :(QueryOperators.pivot_longer(
$src_expr,
QueryOperators._resolve_pivot_cols(eltype($src_expr), Val($instructions))
))
if !isempty(kwargs_exprs)
# Insert keyword arguments into the function call
call_expr.args = [call_expr.args[1]; Expr(:parameters, kwargs_exprs...); call_expr.args[2:end]...]
end
call_expr
end

if source_expr === nothing
call = make_call(:(QueryOperators.query(i)))
return :(i -> $call)
else
call = make_call(:(QueryOperators.query($(esc(source_expr)))))
return call
end
end

macro pivot_wider(source, names_from, values_from)
return :(QueryOperators.pivot_wider(QueryOperators.query($(esc(source))), $(esc(names_from)), $(esc(values_from))))
end

macro pivot_wider(names_from, values_from)
return :(i -> QueryOperators.pivot_wider(QueryOperators.query(i), $(esc(names_from)), $(esc(values_from))))
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ include("test_core.jl")
include("test_dplyr-syntax.jl")
include("test_pipesyntax.jl")
include("test_macros.jl")
include("test_standalone.jl")

@run_package_tests

Expand Down
116 changes: 116 additions & 0 deletions test/test_standalone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,119 @@ end
@test df |> @unique() |> collect == [(a=1,b=3.), (a=2,b=3.)]
@test df |> @unique(_.b) |> collect == [(a=1,b=3.)]
end

@testitem "@pivot_longer operator" begin
using DataFrames

df = DataFrame(year=[2017,2018], US=[1,3], EU=[2,4])

# Pipe form
result = df |> @pivot_longer(:US, :EU) |> collect
@test length(result) == 4
@test eltype(result) == NamedTuple{(:year, :variable, :value), Tuple{Int, Symbol, Int}}
@test result[1] == (year=2017, variable=:US, value=1)
@test result[2] == (year=2017, variable=:EU, value=2)
@test result[3] == (year=2018, variable=:US, value=3)
@test result[4] == (year=2018, variable=:EU, value=4)

# Direct form
result2 = @pivot_longer(df, :US, :EU) |> collect
@test result2 == result

# Collects into a DataFrame
df2 = df |> @pivot_longer(:US, :EU) |> DataFrame
@test df2 isa DataFrame
@test size(df2) == (4, 3)
@test names(df2) == ["year", "variable", "value"]

# Custom output column names (pipe form)
result3 = df |> @pivot_longer(:US, :EU, names_to=:country, values_to=:sales) |> collect
@test length(result3) == 4
@test fieldnames(eltype(result3)) == (:year, :country, :sales)
@test result3[1] == (year=2017, country=:US, sales=1)
@test result3[4] == (year=2018, country=:EU, sales=4)

# Custom output column names (direct form)
result4 = @pivot_longer(df, :US, :EU, names_to=:country, values_to=:sales) |> collect
@test result4 == result3

# Only names_to (values_to defaults to :value)
result5 = df |> @pivot_longer(:US, :EU, names_to=:country) |> collect
@test fieldnames(eltype(result5)) == (:year, :country, :value)

# Only values_to (names_to defaults to :variable)
result6 = df |> @pivot_longer(:US, :EU, values_to=:amount) |> collect
@test fieldnames(eltype(result6)) == (:year, :variable, :amount)
end

@testitem "@pivot_longer selector syntax" begin
using DataFrames

# startswith selector
df = DataFrame(year=[2017,2018], wk1=[1,3], wk2=[2,4], total=[10,20])

result = df |> @pivot_longer(startswith("wk")) |> collect
@test length(result) == 4
@test fieldnames(eltype(result)) == (:year, :total, :variable, :value)
@test result[1] == (year=2017, total=10, variable=:wk1, value=1)
@test result[2] == (year=2017, total=10, variable=:wk2, value=2)

# endswith selector
df2 = DataFrame(sales_2017=[1,2], cost_2017=[3,4], sales_2018=[5,6])
result2 = df2 |> @pivot_longer(endswith("2017")) |> collect
@test length(result2) == 4
@test fieldnames(eltype(result2)) == (:sales_2018, :variable, :value)

# occursin selector
result3 = df2 |> @pivot_longer(occursin("sales")) |> collect
@test length(result3) == 4
@test fieldnames(eltype(result3)) == (:cost_2017, :variable, :value)

# Explicit symbols still work (backward compat)
result4 = df |> @pivot_longer(:wk1, :wk2) |> collect
@test result4 == result

# startswith + exclude by name
result5 = df |> @pivot_longer(startswith("wk"), -:wk2) |> collect
@test length(result5) == 2
@test all(r.variable == :wk1 for r in result5)

# Negated predicate !(startswith(...)) — "all except wk*" pivots :year and :total
result6 = df |> @pivot_longer(!(startswith("wk"))) |> collect
@test length(result6) == 4 # 2 non-wk cols × 2 rows
@test fieldnames(eltype(result6)) == (:wk1, :wk2, :variable, :value)
@test result6[1].variable == :year

# Direct form with predicate
result7 = @pivot_longer(df, startswith("wk")) |> collect
@test result7 == result
end

@testset "@pivot_wider operator" begin
long = DataFrame(
year = [2017, 2017, 2018, 2018],
country = [:US, :EU, :US, :EU],
value = [1, 2, 3, 4]
)

# Pipe form
result = long |> @pivot_wider(:country, :value) |> collect
@test length(result) == 2
@test fieldnames(eltype(result)) == (:year, :US, :EU)
@test result[1].year == 2017
@test result[1].US == DataValue(1)
@test result[1].EU == DataValue(2)
@test result[2].year == 2018
@test result[2].US == DataValue(3)
@test result[2].EU == DataValue(4)

# Direct form
result2 = @pivot_wider(long, :country, :value) |> collect
@test result2 == result

# Collects into a DataFrame
df2 = long |> @pivot_wider(:country, :value) |> DataFrame
@test df2 isa DataFrame
@test size(df2) == (2, 3)
@test names(df2) == ["year", "US", "EU"]
end
Loading