-
Notifications
You must be signed in to change notification settings - Fork 56
Another attempt at an astable flag #298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
a8701c8
9b997a6
d639560
b77e8ca
3cdf0d5
b878fbb
2344a2e
6557def
6002def
08a1c4b
581b2cf
7cc8947
0eca67d
a4ab9a6
ab9bae4
495f08a
01cb5e7
01fb3b7
915191c
a331fc2
2ce4d9e
57b4051
da7674d
285e3ac
713eaf0
4e01c4a
09c692a
ae26da8
a7fd1a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -350,6 +350,99 @@ macro passmissing(args...) | |
| throw(ArgumentError("@passmissing only works inside DataFramesMeta macros.")) | ||
| end | ||
|
|
||
| """ | ||
| astable(args...) | ||
pdeffebach marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| Return a `NamedTuple` from a transformation inside DataFramesMeta.jl macros. | ||
|
|
||
| `@astable` acts on a single block. It works through all top-level expressions | ||
| and collects all such expressions of the form `:y = x`, i.e. assignments to a | ||
pdeffebach marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| `Symbol`, which is a syntax error outside of the macro. At the end of the | ||
pdeffebach marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| expression, all assignments are collected into a `NamedTuple` to be used | ||
| with the `AsTable` destination in the DataFrames.jl transformation | ||
| mini-language. | ||
|
|
||
| Concretely, the expressions | ||
|
|
||
| ``` | ||
| df = DataFrame(a = 1) | ||
|
|
||
| @rtransform df @astable begin | ||
| :x = 1 | ||
| y = 50 | ||
| :z = :x + y + :a | ||
| end | ||
| ``` | ||
|
|
||
| becomes the pair | ||
pdeffebach marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| ``` | ||
| function f(a) | ||
| x_t = 1 | ||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| y = 50 | ||
| z_t = x_t + y + a | ||
|
|
||
| (; x = x_t, z = z_t) | ||
| end | ||
|
|
||
| transform(df, [:a] => ByRow(f) => AsTable) | ||
| ``` | ||
|
|
||
| `@astable` is useful when performing intermediate calculations | ||
| yet store their results in new columns. For example, the following fails. | ||
|
|
||
| ``` | ||
| @rtransform df begin | ||
| :new_col_1 = :x + :y | ||
| :new_col_2 = :new_col_1 + :z | ||
| end | ||
| ``` | ||
|
|
||
| This because DataFrames.jl does not guarantee sequential evaluation of | ||
| transformations. `@astable` solves this problem | ||
|
||
|
|
||
| @rtransform df @astable begin | ||
| :new_col_1 = :x + :y | ||
| :new_col_2 = :new_col_1 + :z | ||
| end | ||
|
|
||
| ### Examples | ||
|
|
||
| ``` | ||
| julia> df = DataFrame(a = [1, 2, 3], b = [4, 5, 6]); | ||
|
|
||
| julia> d = @rtransform df @astable begin | ||
| :x = 1 | ||
| y = 5 | ||
| :z = :x + y | ||
| end | ||
| 3×4 DataFrame | ||
| Row │ a b x z | ||
| │ Int64 Int64 Int64 Int64 | ||
| ─────┼──────────────────────────── | ||
| 1 │ 1 4 1 6 | ||
| 2 │ 2 5 1 6 | ||
| 3 │ 3 6 1 6 | ||
|
|
||
| julia> df = DataFrame(a = [1, 1, 2, 2], b = [5, 6, 70, 80]); | ||
|
|
||
| julia> @by df :a @astable begin | ||
| $(DOLLAR)"Mean of b" = mean(:b) | ||
| $(DOLLAR)"Standard deviation of b" = std(:b) | ||
| end | ||
|
Comment on lines
+429
to
+472
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This example can be achieved without Also, I wouldn't use long column names with spaces in them: better illustrate a single feature at a time.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. great. changed. |
||
| 2×3 DataFrame | ||
| Row │ a Mean of b Standard deviation of b | ||
| │ Int64 Float64 Float64 | ||
| ─────┼─────────────────────────────────────────── | ||
| 1 │ 1 5.5 0.707107 | ||
| 2 │ 2 75.0 7.07107 | ||
| ``` | ||
|
|
||
| """ | ||
| macro astable(args...) | ||
| throw(ArgumentError("@astable only works inside DataFramesMeta macros.")) | ||
| end | ||
|
|
||
| ############################################################################## | ||
| ## | ||
| ## @with | ||
|
|
@@ -1546,17 +1639,6 @@ function combine_helper(x, args...; deprecation_warning = false) | |
|
|
||
| exprs, outer_flags = create_args_vector(args...) | ||
|
|
||
| fe = first(exprs) | ||
| if length(exprs) == 1 && | ||
| get_column_expr(fe) === nothing && | ||
| !(fe.head == :(=) || fe.head == :kw) | ||
|
|
||
| @warn "Returning a Table object from @by and @combine now requires `$(DOLLAR)AsTable` on the LHS." | ||
|
|
||
| lhs = Expr(:$, :AsTable) | ||
| exprs = ((:($lhs = $fe)),) | ||
| end | ||
|
|
||
| t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) | ||
|
|
||
| quote | ||
|
|
@@ -1666,16 +1748,6 @@ end | |
| function by_helper(x, what, args...) | ||
| # Only allow one argument when returning a Table object | ||
| exprs, outer_flags = create_args_vector(args...) | ||
| fe = first(exprs) | ||
| if length(exprs) == 1 && | ||
| get_column_expr(fe) === nothing && | ||
| !(fe.head == :(=) || fe.head == :kw) | ||
|
|
||
| @warn "Returning a Table object from @by and @combine now requires `\$AsTable` on the LHS." | ||
|
|
||
| lhs = Expr(:$, :AsTable) | ||
| exprs = ((:($lhs = $fe)),) | ||
| end | ||
|
|
||
| t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| function conditionally_add_symbols!(inputs_to_function, lhs_assignments, col) | ||
pdeffebach marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # if it's already been assigned at top-level, | ||
| # don't add it to the inputs | ||
| if haskey(lhs_assignments, col) | ||
| return lhs_assignments[col] | ||
| else | ||
| return addkey!(inputs_to_function, col) | ||
| end | ||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| end | ||
|
|
||
| replace_syms_astable!(inputs_to_function, lhs_assignments, x) = x | ||
| replace_syms_astable!(inputs_to_function, lhs_assignments, q::QuoteNode) = | ||
| conditionally_add_symbols!(inputs_to_function, lhs_assignments, q) | ||
|
|
||
| function replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) | ||
| if onearg(e, :^) | ||
| return e.args[2] | ||
| end | ||
|
|
||
| col = get_column_expr(e) | ||
| if col !== nothing | ||
| return conditionally_add_symbols!(inputs_to_function, lhs_assignments, col) | ||
| elseif e.head == :. | ||
| return replace_dotted_astable!(inputs_to_function, lhs_assignments, e) | ||
| else | ||
| return mapexpr(x -> replace_syms_astable!(inputs_to_function, lhs_assignments, x), e) | ||
| end | ||
| end | ||
|
|
||
| protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e) = e | ||
| protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e::Expr) = | ||
| replace_syms!(inputs_to_function, lhs_assignments, e) | ||
|
|
||
| function replace_dotted_astable!(inputs_to_function, lhs_assignments, e) | ||
| x_new = replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[1]) | ||
| y_new = protect_replace_syms_astable!(inputs_to_function, lhs_assignments, e.args[2]) | ||
| Expr(:., x_new, y_new) | ||
| end | ||
|
|
||
| is_column_assigment(ex) = false | ||
| function is_column_assigment(ex::Expr) | ||
| ex.head == :(=) && (get_column_expr(ex.args[1]) !== nothing) | ||
| end | ||
|
|
||
| # Taken from MacroTools.jl | ||
| # No docstring so assumed untable | ||
| block(ex) = isexpr(ex, :block) ? ex : :($ex;) | ||
|
|
||
| function get_source_fun_astable(ex; exprflags = deepcopy(DEFAULT_FLAGS)) | ||
| inputs_to_function = Dict{Any, Symbol}() | ||
| lhs_assignments = OrderedCollections.OrderedDict{Any, Symbol}() | ||
|
|
||
| # Make sure all top-level assignments are | ||
| # in the args vector | ||
| ex = block(MacroTools.flatten(ex)) | ||
| exprs = map(ex.args) do arg | ||
| if is_column_assigment(arg) | ||
| lhs = get_column_expr(arg.args[1]) | ||
| rhs = arg.args[2] | ||
| new_ex = replace_syms_astable!(inputs_to_function, lhs_assignments, arg.args[2]) | ||
| if haskey(inputs_to_function, lhs) | ||
| new_lhs = inputs_to_function[lhs] | ||
| lhs_assignments[lhs] = new_lhs | ||
| else | ||
| new_lhs = addkey!(lhs_assignments, lhs) | ||
pdeffebach marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| end | ||
|
|
||
| Expr(:(=), new_lhs, new_ex) | ||
| else | ||
| replace_syms_astable!(inputs_to_function, lhs_assignments, arg) | ||
| end | ||
| end | ||
| source = :(DataFramesMeta.make_source_concrete($(Expr(:vect, keys(inputs_to_function)...)))) | ||
|
|
||
| inputargs = Expr(:tuple, values(inputs_to_function)...) | ||
| nt_iterator = (:(Symbol($k) => $v) for (k, v) in lhs_assignments) | ||
| nt_expr = Expr(:tuple, Expr(:parameters, nt_iterator...)) | ||
| body = Expr(:block, Expr(:block, exprs...), nt_expr) | ||
|
|
||
| fun = quote | ||
| $inputargs -> begin | ||
| $body | ||
| end | ||
| end | ||
|
|
||
| # TODO: Add passmissing support by | ||
| # checking if any input arguments missing, | ||
| # and if-so, making a named tuple with | ||
| # missing values | ||
| if exprflags[BYROW_SYM][] | ||
| fun = :(ByRow($fun)) | ||
| end | ||
|
|
||
| return source, fun | ||
| end | ||
Uh oh!
There was an error while loading. Please reload this page.