These are chat archives for synrc/n2o

16th
Feb 2017
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 00:07
ok
did you try running my version ?
on my machine, it is really faster
nearly twice faster
list_to_binary is very good at converting list at the end
I've had a version with full binary before, but it was slower as well, so I didn't keep it
Andy
@m-2k
Feb 16 2017 00:25
{result,{5100,21783}}
5100ms - my version
21783ms - you version
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 00:25
waow
Andy
@m-2k
Feb 16 2017 00:26
t_data(html,N) -> list_to_binary([
    ["<div class='sx'>",crypto:rand_bytes(10),"\\n",crypto:rand_bytes(10),"</div>"]
    || _ <- lists:seq(1,N) ]).

test4(X) ->
    R=lists:map(fun(F) ->
        {T,_}=timer:tc(wf_convert,F,[t_data(html,X)]),
        trunc(T/100)
    end,[html_encode,html_encode2]),
    {result,list_to_tuple(R)}.
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 00:26
we are not running the same tests for sure :-)
I try
Andy
@m-2k
Feb 16 2017 00:32
and? i wait for you
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 00:44
We are not measuring the same thing at all
you are measuring the encoding of a very big binary
I'm are measuring thousand encoding of small binaries (my use case)
I will modify my tests to add some random as you did
Andy
@m-2k
Feb 16 2017 00:49
you can run my test on their platform?
Andy
@m-2k
Feb 16 2017 00:58
yep, now you version is more faster {result,{2094,1539}}
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 00:58
test_seb(N) ->
statistics(runtime),
statistics(wall_clock),
for(1, N, fun() -> w_convert:html_encode(t_data(html, 1)) end),
{_, Time1} = statistics(runtime),
{_, Time2} = statistics(wall_clock),
io:format("html_encode=~p (~p) ms~n", [Time1, Time2]),

statistics(runtime),
statistics(wall_clock),
for(1, N, fun() -> w_convert:html_encode2(t_data(html, 1)) end),
{_, Time3} = statistics(runtime),
{_, Time4} = statistics(wall_clock),
io:format("html_encode2=~p (~p) ms~n", [Time3, Time4]),

statistics(runtime),
statistics(wall_clock),
for(1, N, fun() -> w_convert:html_encode3(t_data(html, 1)) end),
{_, Time5} = statistics(runtime),
{_, Time6} = statistics(wall_clock),
io:format("html_encode3=~p (~p) ms~n", [Time5, Time6]).
Andy
@m-2k
Feb 16 2017 00:58
test5(X) ->
    R=lists:map(fun(F) ->
        D=[ t_data(html,1) || _ <- lists:seq(1,X) ],
        {T,_}=timer:tc(?MODULE,test5_fun,[F,D]),
        trunc(T/1000)
    end,[html_encode,html_encode2]),
    {result,list_to_tuple(R)}.

test5_fun(F,List) -> [ wf_convert:F(P) || P <- List ].
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 00:58
and I've got a new version that is quicker
html_encode3
better list construction
here it is
html_encode3(Value) when is_atom(Value)      -> html_encode3(wf:to_binary(Value));
html_encode3(Value) when is_integer(Value)   -> html_encode3(wf:to_binary(Value));
html_encode3(Value) when is_float(Value)     -> html_encode3(wf:to_binary(Value));
html_encode3(Value) when is_binary(Value)    -> html_encode3(Value, []).

html_encode3(<<">",      Rest/binary>>, Acc) -> html_encode3(Rest, ["&gt;"    | Acc]);
html_encode3(<<"<",      Rest/binary>>, Acc) -> html_encode3(Rest, ["&lt;"    | Acc]);
html_encode3(<<"\"",     Rest/binary>>, Acc) -> html_encode3(Rest, ["&quot;"| Acc]);
html_encode3(<<"'",      Rest/binary>>, Acc) -> html_encode3(Rest, ["&#39;"    | Acc]);
html_encode3(<<"&",      Rest/binary>>, Acc) -> html_encode3(Rest, ["&amp;"    | Acc]);
html_encode3(<<"\n",     Rest/binary>>, Acc) -> html_encode3(Rest, ["<br>"    | Acc]);
html_encode3(<<C:8,      Rest/binary>>, Acc) -> html_encode3(Rest, [C        | Acc]);
html_encode3(<<>>, Acc) -> list_to_binary(lists:reverse(Acc)).
a million run on a slow server
w_test:test_seb(1000000).
html_encode=25940 (26597) ms
html_encode2=21760 (22346) ms
html_encode3=17500 (17997) ms
Andy
@m-2k
Feb 16 2017 01:04
yep, erlang guidelines do not lie {result,{2074,1462,1321}}
Andy
@m-2k
Feb 16 2017 01:09
okay, challenge accepted, I wrote a more rapid implementation
{result,{2071,1390,1277,1198}}
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 01:10
ah good !
Andy
@m-2k
Feb 16 2017 01:10
html_encode4(B) -> html_encode4(B,normal).
html_encode4(B,Encode) when is_binary(B) -> iolist_to_binary([html_encode_char4(C,Encode) || <<C>> <= B ]);
html_encode4(L,Encode) when is_list(L) -> binary_to_list(html_encode4(iolist_to_binary(L),Encode)).
html_encode_char4($\s,whites) -> <<"&nbsp;">>;
html_encode_char4($\t,whites) -> <<"&nbsp; &nbsp; &nbsp;">>;
html_encode_char4($\n,_) -> <<"<br>">>;
html_encode_char4($\\,_) -> <<"&#92;">>;
html_encode_char4($<, _) -> <<"&lt;">>;
html_encode_char4($>, _) -> <<"&gt;">>;
html_encode_char4($", _) -> <<"&quot;">>;
html_encode_char4($', _) -> <<"&#39;">>;
html_encode_char4($&, _) -> <<"&amp;">>;
html_encode_char4(C,  _) -> C.
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 01:11
a typo in <br>>
Andy
@m-2k
Feb 16 2017 01:14
i rollback to binary quotes for moar of moar faster!!!! ARRGGGHH
good night. z-z-Z
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 01:16
yep time for me as well
good night !
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:04
Hi Andy
back with some more results, very interesting
html_encode3 is quicker than html_encode4
coz there are 2 mistakes
 html_encode_char4(C,  _) -> C.
should be
html_encode_char4(C,  _) -> <<C>>.
to be consistent with the version
otherwise you are still constructing mainly a list and not a binary and that's why is it quicker !
secondly test5(X) is measuring a lot of argument passing instead of directly measuring the time to construct the binaries
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:09
plus the data submitted to the different encoders should be the same to have correct measures
so here is my version
test5(X) ->
    D=[ t_data(html,1) || _ <- lists:seq(1,X) ],
    R=lists:map(fun(F) ->
        {T,_}=timer:tc(?MODULE,test5_fun,[F,D]),
        trunc(T/1000)
    end,[html_encode3, html_encode4, html_encode5]),
    {result,list_to_tuple(R)}.

test5_fun(F,List) -> [w_convert:F(P) || P <- List], ok.
which shows the same results as the other one I'm using
test6(X, Rands) ->
    D=[ t_data(html,1, Rands) || _ <- lists:seq(1,X) ],
    R=lists:map(fun(F) ->
        statistics(runtime),
        statistics(wall_clock),
        [w_convert:F(P) || P <- D],
        {_, Time1} = statistics(runtime),
        {_, Time2} = statistics(wall_clock),
        io:format("~p=~p (~p) ms~n", [F, Time1, Time2])
    end,[html_encode3, html_encode4, html_encode5]).
And finally, I've build a final version that is quicker than encode3, and that is encode5
it is especially quicker when the size of the string to encode grows
Andy
@m-2k
Feb 16 2017 11:17
@seb3s you want to show me faster code or what?
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:17
yes
coming
html_encode5(Value) when is_atom(Value); is_integer(Value); is_float(Value) -> html_encode5(wf:to_binary(Value));
html_encode5(Value) when is_binary(Value)   -> list_to_binary(html_encode_aux5(Value)).
html_encode_aux5(<<"\n",     Rest/binary>>) -> ["<br>"    | html_encode_aux5(Rest)];
html_encode_aux5(<<"\\",     Rest/binary>>) -> ["&#92;"    | html_encode_aux5(Rest)];
html_encode_aux5(<<"<",      Rest/binary>>) -> ["&lt;"    | html_encode_aux5(Rest)];
html_encode_aux5(<<">",      Rest/binary>>) -> ["&gt;"    | html_encode_aux5(Rest)];
html_encode_aux5(<<"\"",     Rest/binary>>) -> ["&quot;"| html_encode_aux5(Rest)];
html_encode_aux5(<<"'",      Rest/binary>>) -> ["&#39;"    | html_encode_aux5(Rest)];
html_encode_aux5(<<"&",      Rest/binary>>) -> ["&amp;"    | html_encode_aux5(Rest)];
html_encode_aux5(<<C:8,      Rest/binary>>) -> [C         | html_encode_aux5(Rest)];
html_encode_aux5(<<>>) -> [].
to test with different sizes
t_data(html,N, Rands) -> list_to_binary([
    ["<div class='sx'>",crypto:rand_bytes(Rands),"\\n",crypto:rand_bytes(Rands),"</div>"]
    || _ <- lists:seq(1,N) ]).
and use test6(X, Rands)
some results
w_test:test6(1000000,40).
html_encode3=9800 (10538) ms
html_encode4=12740 (13093) ms
html_encode5=6590 (6714) ms
[ok,ok,ok]
389> w_test:test6(100000,100). 
html_encode3=2060 (2138) ms
html_encode4=2660 (2736) ms
html_encode5=1290 (1330) ms
[ok,ok,ok]
390> w_test:test6(10000,1000). 
html_encode3=570 (581) ms
html_encode4=920 (924) ms
html_encode5=520 (525) ms
[ok,ok,ok]
391> w_test:test6(100000,1000).
html_encode3=18190 (19898) ms
html_encode4=24060 (25000) ms
html_encode5=11270 (11640) ms
[ok,ok,ok]
I like it
Andy
@m-2k
Feb 16 2017 11:22
byte-raping
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:22
what that means ?
Andy
@m-2k
Feb 16 2017 11:23
russian slang
Namdak Tonpa
@5HT
Feb 16 2017 11:23
he try to ressemble russian term "байтойобство" which literally means "byte fucker"
— the person who spend too much time for byte-related optimizations
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:24
aha I see :-)
Namdak Tonpa
@5HT
Feb 16 2017 11:25
so I'm rready to merge html_encode5 show me it :-)
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:27
It's a few lines above, It is just missing spaces encoding as I don't use that
Andy
@m-2k
Feb 16 2017 11:28
ugh, wait for merging, i want to test it! :smile:
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:28
sure !!!! :-)
Andy
@m-2k
Feb 16 2017 11:41
1000000 steps, small data
wf:test6(1000000).
html_encode=4700 (4895) ms
html_encode2=2580 (2712) ms
html_encode3=1860 (1969) ms
html_encode4=2560 (2806) ms
html_encode5=2240 (2338) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:43
what if you increase the size of the binary
on my machine, encode5 is as fast as encode3 for small binaries but quicker as binaries are getting bigger
so it seems overall better but let's see some figures on your machine
Andy
@m-2k
Feb 16 2017 11:45
1 step, large data
wf:test6(1000000).
html_encode=4800 (5148) ms
html_encode2=11430 (24114) ms
html_encode3=3990 (6040) ms
html_encode4=7130 (10641) ms
html_encode5=5730 (9320) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:46
something like w_test:test6(100000,1000)
Andy
@m-2k
Feb 16 2017 11:46
ok
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:47
on my machine encode5 is twice better in this case
than encode3
all my tests are running on a CPU server, real use case
Intel(R) Xeon(R) CPU E3-1245 v5 @ 3.50GHz
Andy
@m-2k
Feb 16 2017 11:49
wf:test6(10000,1000).
html_encode=2340 (2461) ms
html_encode2=5910 (13071) ms
html_encode3=3290 (5485) ms
html_encode4=2950 (4680) ms
html_encode5=2580 (4220) ms
Namdak Tonpa
@5HT
Feb 16 2017 11:50
(100000,1000)
don't be in hurry
Andy
@m-2k
Feb 16 2017 11:51
я тогда запущу и спать пойд
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 11:59
w_test:test6(100000,1000).
html_encode=22550 (23094) ms
html_encode2=20220 (20918) ms
html_encode3=18340 (20088) ms
html_encode4=25450 (26537) ms
html_encode5=12250 (12611) ms
Andy
@m-2k
Feb 16 2017 12:01
html_encode2 on my laptop eat very many memory
html_encode2 has not been done
now i can tested on Intel(R) Xeon(R) CPU E5-2650L v3 @ 1.80GHz
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:02
yep I think encode2 is not good as it appends to list on the end
bad practice
Andy
@m-2k
Feb 16 2017 12:03
this? html_encode2(<<"\"", Rest/binary>>, Acc) -> html_encode2(Rest, [Acc | "&quot;"]);
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:03
w_test:test6(10000000,1). 
html_encode=17350 (17763) ms
html_encode2=8340 (8456) ms
html_encode3=6110 (7088) ms
html_encode4=9250 (9369) ms
html_encode5=5970 (6179) ms
yep
Andy
@m-2k
Feb 16 2017 12:03
aahah, yeee, lol, bullshit code
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:04
encode5 is better on my server in all scenario
Andy
@m-2k
Feb 16 2017 12:23

laptop

wf_convert_test:test7(1000000,10).
html_encode=4830 (5180) ms
html_encode3=1820 (1924) ms
html_encode4=2410 (2557) ms
html_encode5=2220 (2327) ms

server

wf_convert_test:test7(1000000,10).
html_encode=7780 (12345) ms
html_encode3=3280 (6466) ms
html_encode4=3720 (5033) ms
html_encode5=3550 (4880) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:25
(100000,1000) on server ?
Andy
@m-2k
Feb 16 2017 12:25
wait
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:26
I'm running OTP 19.1 on debian jessie so pretty up to date
Andy
@m-2k
Feb 16 2017 12:26

laptop

wf_convert_test:test7(100000,1000). 
html_encode=25790 (27248) ms
html_encode3=14600 (16935) ms
html_encode4=12920 (13998) ms
html_encode5=11380 (12550) ms

server

wf_convert_test:test7(100000,1000). 
html_encode=35200 (49497) ms
html_encode3=18960 (26866) ms
html_encode4=16110 (21795) ms
html_encode5=15000 (20521) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:28
OTP version ?
Andy
@m-2k
Feb 16 2017 12:28
Erlang/OTP 19 [erts-8.0] [source] [64-bit] [async-threads:10] [hipe] [kernel-poll:false]
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:29
Good, you see less differences than on my server
but that's ok
encode5 is good I think in a lot of situations, and that's really quicker than the original !
what do u think ?
Namdak Tonpa
@5HT
Feb 16 2017 12:31
I will merge
Andy
@m-2k
Feb 16 2017 12:32
We need to test on real data
Namdak Tonpa
@5HT
Feb 16 2017 12:33
I don't need it to merge :-)
just show me the code
Andy
@m-2k
Feb 16 2017 12:33
that usually pass to html_encode?
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:34
yep I will run it for a little one plus this
js_escape3(undefined) -> [];
js_escape3(Value) when is_list(Value)       -> binary_to_list(js_escape3(iolist_to_binary(Value)));
js_escape3(Value) when is_binary(Value)     -> list_to_binary(js_escape_aux3(Value)).
js_escape_aux3(<<"\\",      Rest/binary>>) -> ["\\\\"            | js_escape_aux3(Rest)];
js_escape_aux3(<<"\r",      Rest/binary>>) -> ["\\r"            | js_escape_aux3(Rest)];
js_escape_aux3(<<"\n",      Rest/binary>>) -> ["\\n"            | js_escape_aux3(Rest)];
js_escape_aux3(<<"\"",      Rest/binary>>) -> ["\\\""            | js_escape_aux3(Rest)];
js_escape_aux3(<<"'",       Rest/binary>>) -> ["\\'"            | js_escape_aux3(Rest)];
js_escape_aux3(<<"<script",    Rest/binary>>) -> ["<scr\" + \"ipt"    | js_escape_aux3(Rest)];
js_escape_aux3(<<"script>", Rest/binary>>) -> ["scr\" + \"ipt>"    | js_escape_aux3(Rest)];
js_escape_aux3(<<C:8,       Rest/binary>>) -> [C                | js_escape_aux3(Rest)];
js_escape_aux3(<<>>) -> [].
Andy
@m-2k
Feb 16 2017 12:34
@5HT jse тоже надо пофиксать
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:34
same move
Andy
@m-2k
Feb 16 2017 12:34
хуле
Namdak Tonpa
@5HT
Feb 16 2017 12:34
he said he already run the tests on his real data
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:34
on js_escape
Namdak Tonpa
@5HT
Feb 16 2017 12:34
@seb3s don't you mind to rename js_escape to wf:jse?
and possibly wf:hte
Andy
@m-2k
Feb 16 2017 12:35
да
просто алиасы сделать в wf
для совместимости
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:35
It's ok I've got already define on it as I find it's too long names
Namdak Tonpa
@5HT
Feb 16 2017 12:35
with aliases for the first time
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:35
-define(js_esc(Term), w_convert:js_escape(Term)).
same with ht_esc
so jse and hte is good to me :-)
Andy
@m-2k
Feb 16 2017 12:36
@seb3s no, wf:js_escape -> wf_convert:jse
Namdak Tonpa
@5HT
Feb 16 2017 12:37
no, I'm talking about top level documentation
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:37
yep plus wf:jse -> wf_convert:jse
Namdak Tonpa
@5HT
Feb 16 2017 12:37
wf:jse
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:37
as I understand it
Andy
@m-2k
Feb 16 2017 12:37
wf:jse of course
Namdak Tonpa
@5HT
Feb 16 2017 12:37
ok guys
just merge it )
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 12:38
lunch time for me, see you soon !
Andy
@m-2k
Feb 16 2017 12:38
@5HT всё же мне не нравятся эти генераторы с аккумулятором
почему [ || <- ] работает медленнее
надо написать Джо, хуле он не оптимизировал
:smile:
@5HT покажи код прежде чем комитить, чтобы потом не патчить
Andy
@m-2k
Feb 16 2017 12:52
 wf_convert_test:test7(10000,1000). 
html_encode=2670 (2516) ms
html_encode3=990 (1161) ms
html_encode4=1120 (1214) ms
html_encode5=930 (1019) ms
hte=800 (881) ms
don't need final list_to_binary
hte(B) -> hte(B,normal).
hte(X,Fun) when is_function(Fun) -> Fun(X);
hte(Y,Encode) when is_atom(Y); is_integer(Y); is_float(Y) -> hte(wf:to_binary(Y),Encode);
hte(X, false) -> X;
hte(X, true) -> hte(X,normal);
hte(B,Encode) when is_binary(B) -> hte_aux(B,Encode);
hte(L,Encode) when is_list(L) -> hte_aux(iolist_to_binary(L),Encode).

hte_aux(<<"\s", T/binary>>,whites) -> [<<"&nbsp;">> | hte_aux(T,whites)];
hte_aux(<<"\t", T/binary>>,whites) -> [<<"&nbsp; &nbsp; &nbsp;">> | hte_aux(T,whites)];
hte_aux(<<"\n", T/binary>>,E)      -> [<<"<br>">>   | hte_aux(T,E)];
hte_aux(<<"\\", T/binary>>,E)      -> [<<"&#92;">>  | hte_aux(T,E)];
hte_aux(<<"<",  T/binary>>,E)      -> [<<"&lt;">>   | hte_aux(T,E)];
hte_aux(<<">",  T/binary>>,E)      -> [<<"&gt;">>   | hte_aux(T,E)];
hte_aux(<<"\"", T/binary>>,E)      -> [<<"&quot;">> | hte_aux(T,E)];
hte_aux(<<"'",  T/binary>>,E)      -> [<<"&#39;">>  | hte_aux(T,E)];
hte_aux(<<"&",  T/binary>>,E)      -> [<<"&amp;">>  | hte_aux(T,E)];
hte_aux(<<C:8,  T/binary>>,E)      -> [C            | hte_aux(T,E)];
hte_aux(<<>>, _)                   -> [].
jse billshit:
js_escape_aux3(<<"<script",    Rest/binary>>) -> ["<scr\" + \"ipt"    | js_escape_aux3(Rest)];
wf:jse("<Script>alert();</Script>").
"<Script>alert();</Script>"
Andy
@m-2k
Feb 16 2017 13:04
@5HT @seb3s any idea for 'script' escaping?
interesting, re:run is very slower or not (with compiled pattern)
Andy
@m-2k
Feb 16 2017 13:26
OMG!!!
html_encode=2840 (2509) ms
html_encode3=980 (1101) ms
html_encode4=1090 (1152) ms
html_encode5=920 (979) ms
hte=800 (851) ms
hte2=210 (216) ms
Andy
@m-2k
Feb 16 2017 13:36
list_to_binary(wf_convert_test:hte2(<<"<div class='xl'>блэйзинг\nфаст</div>"/utf8>>)).
<<"&lt;div class=&#39;xl&#39;&gt;блэйзинг<br>фаст&lt;/div&gt;"/utf8>>
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 13:59
two things
hte doesn't returns a binary but a io_list mixing binaries and integer
hte_aux(<<C:8,  T/binary>>,E)      -> [C            | hte_aux(T,E)];
Andy
@m-2k
Feb 16 2017 14:00

returns a binary

no needed

Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:01
I thought it was the specs
in this case why are your mixing integer and binaries ? and not keeping only integers ?
Andy
@m-2k
Feb 16 2017 14:02
faster
mixed binary list works faster for list_to_binary instead list of lists
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:04
ok
do you have hte2 ?
Andy
@m-2k
Feb 16 2017 14:09
fail, for all 7 conditions it works slowest
wf_convert_test:test7(10000,1000).                    
hte=810 (892) ms
hte2=4610 (4789) ms
hte2(B) -> hte2(B,normal).
hte2(X,Fun) when is_function(Fun) -> Fun(X);
hte2(Y,Encode) when is_atom(Y); is_integer(Y); is_float(Y) -> hte2(wf:to_binary(Y),Encode);
hte2(X, false) -> X;
hte2(X, true) -> hte2(X,normal);
hte2(B,Encode) when is_binary(B) -> hte2_aux(B,Encode);
hte2(L,Encode) when is_list(L) -> hte2_aux(iolist_to_binary(L),Encode).

hte2_re() ->
    {re_pattern,1,0,0,
                    <<69,82,67,80,111,0,0,0,0,0,0,0,1,8,0,0,255,255,255,255,
                      255,255,255,255,0,0,0,0,0,0,1,0,0,0,64,0,0,0,0,0,0,0,
                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125,0,43,
                      127,0,7,0,1,29,10,113,0,5,29,92,113,0,5,29,60,113,0,5,
                      29,62,113,0,5,29,34,113,0,5,29,39,113,0,5,29,38,114,0,
                      37,114,0,43,0>>}.
hte2_aux(B,Encode) ->
    % {ok,Re}=re:compile(<<"(\\n|\\\\|\\<|\\>|\\\"|\\'|\\&)">>),
    Match=re:split(B,hte2_re(),[group]),
    [ case W of
        [] -> Z;
hte2_aux(B,Encode) ->
    % {ok,Re}=re:compile(<<"(\\n|\\\\|\\<|\\>|\\\"|\\'|\\&)">>),
    Match=re:split(B,hte2_re(),[group]),
    [ case W of
        [] -> Z;
        [<<"\n">>] -> [Z,<<"<br>">>  ];
        [<<"\\">>] -> [Z,<<"&#92;">> ];
        [<<"<">>]  -> [Z,<<"&lt;">>  ];
        [<<">">>]  -> [Z,<<"&gt;">>  ];
        [<<"\"">>] -> [Z,<<"&quot;">>];
        [<<"'">>]  -> [Z,<<"&#39;">> ];
        [<<"&">>]  -> [Z,<<"&amp;">> ]
        end || [Z|W] <- Match].
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:12
ouch !
Andy
@m-2k
Feb 16 2017 14:24
for 1 condition (\n) works faster (vs. tail recursion):
wf_convert_test:test7(10000,1000).
hte=990 (819) ms
hte2=220 (228) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:29
It will probably degrades a lot if the number of escaped chars increase
Andy
@m-2k
Feb 16 2017 14:30
linear dependency with | condition
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:31
the number of matches and split will increase
Andy
@m-2k
Feb 16 2017 14:31
pattern matching win
@seb3s any idea for optimize regexp?
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:33
no
Andy
@m-2k
Feb 16 2017 14:34
we have to write a parser on C (NIF)! :smile:
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:34
:-)
Andy
@m-2k
Feb 16 2017 14:45
optimized, but already slowest: <<"([\\n\\\\\\<\\>\\\"\\'\\&])">>
wf_convert_test:test7(100000,1000).
html_encode=29190 (29698) ms
html_encode5=11080 (12460) ms
hte=7670 (8433) ms
hte2=26010 (27327) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:46
regex seems really slower on my server in all cases right now
w_test:test6(10000,1000).
hte=420 (421) ms
hte2=2080 (2081) ms
Andy
@m-2k
Feb 16 2017 14:47
true
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:48
hte is good now !
pure erlang and quick enough :-)
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 14:57
aha interesting...
I don't need that much right now i think ...
Andy
@m-2k
Feb 16 2017 14:58
I think to stop there
wf_convert_test:test7(10000,1000).
html_encode=2320 (2420) ms
html_encode5=900 (963) ms
hte=850 (1105) ms
hte2=2750 (3010) ms
xmerl_lib_export_text=870 (1072) ms
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 15:02
xmerl_lib escapes fewer chars
Andy
@m-2k
Feb 16 2017 15:03
yes I know, just for test native erlang lib )
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 15:06
I think I like hte as it is right now :-) can be merged for me
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 15:40
andy
here is my jse version that corrects the "ScRipT" pb
jse(undefined) -> [];
jse(Value) when is_binary(Value)     -> jse_aux(Value);
jse(Value) when is_list(Value)       -> jse_aux(iolist_to_binary(Value)).
jse_aux(<<"\\",      Rest/binary>>) -> [<<"\\\\">>    | jse_aux(Rest)];
jse_aux(<<"\r",      Rest/binary>>) -> [<<"\\r">>    | jse_aux(Rest)];
jse_aux(<<"\n",      Rest/binary>>) -> [<<"\\n">>    | jse_aux(Rest)];
jse_aux(<<"\"",      Rest/binary>>) -> [<<"\\\"">>    | jse_aux(Rest)];
jse_aux(<<"'",       Rest/binary>>) -> [<<"\\'">>    | jse_aux(Rest)];
jse_aux(<<"<",A,B,C,D,E,F, Rest/binary>>) when 
     (A =:= $s orelse A =:= $S) andalso
     (B =:= $c orelse B =:= $C) andalso
     (C =:= $r orelse C =:= $R) andalso
     (D =:= $i orelse D =:= $I) andalso
     (E =:= $p orelse E =:= $P) andalso
     (F =:= $t orelse F =:= $T) -> [<<"<scr\" + \"ipt">>    | jse_aux(Rest)];
jse_aux(<<A,B,C,D,E,F,">", Rest/binary>>) when
     (A =:= $s orelse A =:= $S) andalso
     (B =:= $c orelse B =:= $C) andalso
     (C =:= $r orelse C =:= $R) andalso
     (D =:= $i orelse D =:= $I) andalso
     (E =:= $p orelse E =:= $P) andalso
     (F =:= $t orelse F =:= $T) -> [<<"scr\" + \"ipt>">> | jse_aux(Rest)];
jse_aux(<<C:8, Rest/binary>>) -> [C | jse_aux(Rest)];
jse_aux(<<>>) -> [].
this is still faster than the existing one, and good enough in my opinion
Andy
@m-2k
Feb 16 2017 17:20
@seb3s next level: < script >
Sébastien Saint-Sevin
@seb3s
Feb 16 2017 17:46
I don't think it can be done with guards neither with binary pattern maching
Matti Katila
@mudyc
Feb 16 2017 19:23
how to catch up the first event coming from js side?
Matti Katila
@mudyc
Feb 16 2017 20:17
event(init) it seems