Erlang alternative to f# sequence

Asked 15/5, 2015 at 17:57 Answered 17/5, 2015 at 8:6

Is there an alternative for F# "seq" construct in Erlang? For example, in F# I can write an O(1) memory integrate function

let integrate x1 x2 dx f =
    let N = int (abs (x2-x1)/dx)
    let sum = seq { for i in 0..N do yield dx*f(x1 + dx * (double i)) }
                |>  Seq.sum
    if x2>x1 then sum else -sum

In Erlang, I have an implementation which uses lists, and therefore has O(n) memory requirement which is unacceptable for such simple function,

create(Dx, N)->[0| create(Dx, N,[])].

create(Dx, 0, Init)->Init;
create(Dx, N, Init)->create(Dx,N-1, [Dx*N |Init]).

integral(X1,X2,Dx, F) ->
    N=trunc((X2-X1)/Dx),
    Points = create(Dx,N),      
    Vals = lists:map(fun(X)->F(X)*Dx end, Points),
    lists:sum(Vals).

Nickeliferous answered 15/5, 2015 at 17:57 Comment(0)

Disclaimer: the following is written under assumption that Erlang disallows mutation completely, of which I'm not sure, because I don't know Erlang well enough.

Seq is internally mutation-based. It maintains "current state" and mutates it on every iteration. So that when you do one iteration, you get the "next value", but you also get a side effect, which is that the enumerator's internal state has changed, and when you do next iteration, you will get a different "next value", and so on. This is usually nicely covered with functional-looking comprehensions, but if you were to ever work with IEnumerator directly, you will see the non-purity with naked eye.

Another way to think about it is, given a "sequence", you are getting two results: "next value" and "rest of sequence", and then "rest of sequence" becomes your new "sequence", and you can repeat the process. (and the original "sequence" is forever gone)

This line of thought can be directly expressed in F#:

type MySeq<'a> = MySeq of (unit -> ('a * MySeq<'a>))

Meaning: "a lazy sequence is a function that, when applied, returns its head and tail, where tail is another lazy sequence". MySeq of is included to keep the type from becoming infinite.
(sorry, I'll use F#, I don't know Erlang well enough; I'm sure you can translate)

But then, seeing how sequences are usually finite, the whole thing should be optional:

type MySeq<'a> = MySeq of (unit -> ('a * MySeq<'a>) option)

Given this definition, you can trivially make some constructors:

  module MySeq =
    let empty = MySeq <| fun () -> None
    let cons a rest = MySeq <| fun () -> Some (a, rest)
    let singleton a = cons a empty
    let rec repeat n a =
        if n <= 0 then empty
        else MySeq <| fun () -> Some (a, (repeat (n-1) a))
    let rec infinite a = MySeq <| fun() -> Some (a, infinite a)
    let rec ofList list =
        match list with
        | [] -> empty
        | x :: xs -> MySeq <| fun () -> Some (x, ofList xs)

Map and fold are also trivial:

let rec map f (MySeq s) = MySeq <| fun () ->
    match s() with
    | None -> None
    | Some (a, rest) -> Some (f a, map f rest)

let rec fold f acc0 (MySeq s) =
    match s() with
    | None -> acc0
    | Some (a, rest) -> fold f (f acc0 a) rest

And from fold you can build everything, which is not a lazy sequence itself. But to build lazy sequences, you need a "rolling fold" (sometimes called "scan"):

let rec scan f state0 (MySeq s) = MySeq <| fun() ->
    match s() with
    | None -> None
    | Some (a, rest) ->
        let newState = f state0 a
        Some (newState, scan f newState rest)

// reformulate map in terms of scan:
let map f = scan (fun _ a -> f a) Unchecked.defaultof<_>

Here's how to use it:

let emptySeq = MySeq.empty
let numbers = MySeq.ofList [1; 2; 3; 4]
let doubles = MySeq.map ((*) 2) numbers  // [2; 4; 6; 8]
let infiniteNumbers = 
    MySeq.infinite () 
    |> MySeq.scan (fun prev _ -> prev+1) 0
let infiniteDoubles = MySeq.map ((*) 2) infiniteNumbers

And in conclusion, I'd like to add that mutation-based solution will nearly always be more performant (all things being equal), at least a little. Even if you immediately throw away old state as you calculate new, the memory still needs to be reclaimed, which is itself a performance hit. The benefits of immutability do not include performance fine-tuning.

Update:
Here's my crack at Erlang version. Keep in mind that this is the very first code that I ever wrote in Erlang. As such, I'm sure there are better ways to encode this, and that there must be a library for this already available.

-module (seq).
-export ([empty/0, singleton/1, infinite/1, repeat/2, fold/3, scan/3, map/2, count/1]).

empty() -> empty.
singleton(A) -> fun() -> {A, empty} end.
infinite(A) -> fun() -> {A, infinite(A)} end.

repeat(0,_) -> empty;
repeat(N,A) -> fun() -> {A, repeat(N-1,A)} end.

fold(_, S0, empty) -> S0;
fold(F, S0, Seq) ->
  {Current, Rest} = Seq(),
  S1 = F(S0, Current),
  fold(F, S1, Rest).

scan(_, _, empty) -> empty;
scan(F, S0, Seq) -> fun() ->
  {Current, Rest} = Seq(),
  S1 = F(S0, Current),
  {S1, scan(F, S1, Rest)}
end.

map(F, Seq) -> scan( fun(_,A) -> F(A) end, 0, Seq ).
count(Seq) -> fold( fun(C,_) -> C+1 end, 0, Seq ).

Usage:

1> c(seq).
{ok,seq}
2> FiveTwos = seq:repeat(5,2).
#Fun<seq.2.133838528>
3> Doubles = seq:map( fun(A) -> A*2 end, FiveTwos ).
#Fun<seq.3.133838528>
5> seq:fold( fun(S,A) -> S+A end, 0, Doubles ).
20
6> seq:fold( fun(S,A) -> S+A end, 0, FiveTwos ).
10
11> seq:count( FiveTwos ).
5

Simaroubaceous answered 15/5, 2015 at 20:24 Comment(3)

Thank you for help, but your code doesn't compile under VS 2012. In "Some (a, repeat (n-1) a)" or "Some (a, infinite a)" it complains:

Type mismatch. Expecting a     'a     but given a     unit -> ('b * 'a) option     The resulting type would be infinite when unifying ''a' and 'unit -> ('b * 'a) option'

Unfortunately I know Erlang for circa 3 hours so I have no idea how to translate most of this, especially when I cannot watch it running. – Davao 15/5, 2015 at 21:50

@JacekGajek, you must forgive me for non-compiling code, I was typing it on my phone at the time. I thought the idea should be clear enough by itself, and only included code for illustration. I have replaced all examples with correct, verified code. – Simaroubaceous 15/5, 2015 at 23:54

@JacekGajek I also produced an Erlang version just now, feel free to use it for reference. – Simaroubaceous 16/5, 2015 at 1:58

This is not tested, but is one way of doing it.

The idea is to turn the list into a process which yields the next value when asked. You can easily generalize the idea if you need to do so.

Alternatively, you can write an unfold which can then unfold the list a bit at a time and use this as input to the generic processor.

Another way is to implement lazy streams, based on the idea that any Expr can be delayed by fun () -> Expr end perhaps best written as -define(DELAY(X), fun() -> X end). as a macro and then used together with -define(FORCE(X), X()).

-module(z).

-export([integral/4]).

create(Dx, N) ->
  spawn_link(fun() -> create_loop(Dx, N) end).

create_loop(Dx, 0, Acc)->
    receive
        {grab, Target} -> Target ! done,
        ok
    after 5000 ->
       exit(timeout_error)
    end;
create_loop(Dx, N, Acc) ->
    receive
        {grab, Target} ->
            Target ! {next, Dx*N},
            create_loop(Dx, N-1)
    after 5000 ->
        exit(timeout_error)
    end.

next(Pid) ->
    Pid ! {grab, self()},
    receive
        {next, V} -> {next, V};
        done -> done
    after 5000 ->
        exit(timeout_error)
    end.

sum(F, Points, Acc) ->
    case next(Points) of
        {next, V} -> sum(F, Points, Acc + F(V));
        done -> Acc
    end.

integral(X1, X2, Dx, F) ->
    N = trunc( (X2 - X1) / Dx),
    Points = create(Dx, N),
    sum(fun(X) -> F(X) * Dx end, Points, 0).

The solution based on DELAY/FORCE is something like:

-module(z).
-define(DELAY(X), fun() -> X end).
-define(FORCE(X), X()).

create(Dx, N) ->
    [0 | ?DELAY(create_loop(Dx, N))].

create_loop(Dx, N) ->
    [Dx*N | ?DELAY(create_loop(Dx, N-1)]; % This is an abuse of improper lists
create_loop(_, 0) -> [].

map(F, []) -> [];
map(F, [V | Thunk]) ->
    [F(V) | ?DELAY(map(F, ?FORCE(Thunk)))].

sum([], Acc) -> Acc;
sum([V | Thunk], Acc) ->
    sum(?FORCE(Thunk), V + Acc).

integral(X1,X2,Dx, F) ->
    N = trunc((X2-X1) / Dx),
    Points = create(Dx, N),
    Vals = map(fun(X) -> F(X)*Dx end, Points),
    sum(Vals).

But not tested.

Diane answered 15/5, 2015 at 18:50 Comment(2)

Way too complicated... Multithreading is one step further. For know I'd prefer to have the simpliest possible solution. – Davao 15/5, 2015 at 21:32

If you clean it up a bit, it is not complicated. Most of the structure can be hoisted out into a library. Also, it has the added value that it can produce values in parallel with your computation, which is good for some problems. However, I added the idea of DELAY/FORCE to make explicit stream.s – Diane 16/5, 2015 at 22:54

The most popular way to create memory stable processing is to define tail recursive function. For example:

integrate_rec(X1, X2, DX, F) when X2 >= X1 ->
    integrate_rec(X1, X2, DX, F, X1, 0, 1);
integrate_rec(X1, X2, DX, F) when X2 < X1 ->
    integrate_rec(X2, X1, DX, F, X2, 0, -1).

integrate_rec(X1, X2, _DX, _F, X, Sum, Sign) when X >= X2 -> 
    Sign*Sum;
integrate_rec(X1, X2, DX, F, X, Sum, Sign) -> 
    integrate_rec(X1, X2, DX, F, X + DX, Sum + DX*F(X), Sign).

But it doesn't look clear... I had the same problem once and have made short helper for function that allows you to iterate without lists:

integrate_for(X1, X2, DX, F) ->
    Sign = if X2 < X1 -> -1; true -> 1 end,
    Sum = (for(0, {X1, X2, Sign*DX}))(
            fun (X, Sum) -> 
                Sum + DX*F(X)
            end),
    Sign*Sum.

Unfortunately, it's a little bit slower than direct recursion:

benchmark() ->
    X1 = 0, 
    X2 = math:pi(),
    DX = 0.0000001,
    F = fun math:sin/1,
    IntegrateFuns = [fun integrate_rec/4, fun integrate_for/4],
    Args = [X1, X2, DX, F],
    [timer:tc(IntegrateFun, Args) || IntegrateFun <- IntegrateFuns].

> [{3032398,2.000000000571214},{4069549,2.000000000571214}]

So it's ~3.03s to ~4.07s - not that bad.

Headman answered 16/5, 2015 at 1:32 Comment(0)

I like concise expression, it is why I propose this forth solution (defined in the shell, but it should be easy to adapt to a module):

1> Int = fun Int(X,N,N,D,F,V,LF) -> (V + (F(N*D+X)+LF)*D)/2; Int(X,C,N,D,F,V,LF) -> NF = F(X+C*D), Int(X,C+1,N,D,F,V+(NF+LF)*D,NF) end.
#Fun<erl_eval.27.90072148>
2> Integral = fun(X1,X2,Dx,F) -> S = abs(X2-X1) / (X2-X1), N = round(S*(X2-X1)/Dx), Int(X1,1,N,S*Dx,F,0,F(X1)) end.
#Fun<erl_eval.4.90072148>
3> F = fun(X) -> 2*X end.
#Fun<erl_eval.6.90072148>
4> Integral(0,2,0.00001,F).
4.000000000000002
5> Integral(2,0,0.00001,F).
-3.9999999999999996
6>

Int does the evaluation looping recursively, Integral prepares the parameters before calling Int.

Epoch answered 17/5, 2015 at 8:6 Comment(0)

Recommended topics

Hot tags