I call shenanigans. The "fast" version has a considerably larger number of ops and even performs more reads from memory, (stack, so in L1 but still slower than registers).
00007FFAC53D3D01 movups xmmword ptr [rsp+8],xmm0
00007FFAC53D3D06 sub rsp,48h
00007FFAC53D3D0A mov qword ptr [rsp+20h],0
00007FFAC53D3D13 mov qword ptr [rsp+28h],0
00007FFAC53D3D1C mov qword ptr [rsp+30h],0
00007FFAC53D3D25 mov rax,7FFAC5423D40h
00007FFAC53D3D2F mov eax,dword ptr [rax]
00007FFAC53D3D31 test eax,eax
00007FFAC53D3D33 je 00007FFAC53D3D3A
00007FFAC53D3D35 call 00007FFB24EE39F0
00007FFAC53D3D3A mov r8d,8
00007FFAC53D3D40 xor edx,edx
00007FFAC53D3D42 lea rcx,[rsp+20h]
00007FFAC53D3D47 call 00007FFB24A21680
t.DoubleValue = value;
00007FFAC53D3D4C movsd xmm5,mmword ptr [rsp+50h]
00007FFAC53D3D52 movsd mmword ptr [rsp+20h],xmm5
UInt64 exp = t.UintValue & 0xfff0000000000000;
00007FFAC53D3D58 mov rax,qword ptr [rsp+20h]
00007FFAC53D3D5D mov rcx,0FFF0000000000000h
00007FFAC53D3D67 and rax,rcx
00007FFAC53D3D6A mov qword ptr [rsp+28h],rax
UInt64 man = t.UintValue & 0x000fffffffffffff;
00007FFAC53D3D6F mov rax,qword ptr [rsp+20h]
00007FFAC53D3D74 mov rcx,0FFFFFFFFFFFFFh
00007FFAC53D3D7E and rax,rcx
00007FFAC53D3D81 mov qword ptr [rsp+30h],rax
return (exp == 0x7ff0000000000000 || exp == 0xfff0000000000000) && (man != 0);
00007FFAC53D3D86 mov rax,7FF0000000000000h
00007FFAC53D3D90 cmp qword ptr [rsp+28h],rax
00007FFAC53D3D95 je 00007FFAC53D3DA8
00007FFAC53D3D97 mov rax,0FFF0000000000000h
00007FFAC53D3DA1 cmp qword ptr [rsp+28h],rax
00007FFAC53D3DA6 jne 00007FFAC53D3DBD
00007FFAC53D3DA8 xor eax,eax
00007FFAC53D3DAA cmp qword ptr [rsp+30h],0
00007FFAC53D3DB0 setne al
00007FFAC53D3DB3 mov dword ptr [rsp+38h],eax
00007FFAC53D3DB7 mov al,byte ptr [rsp+38h]
00007FFAC53D3DBB jmp 00007FFAC53D3DC1
00007FFAC53D3DBD xor eax,eax
00007FFAC53D3DBF jmp 00007FFAC53D3DC1
00007FFAC53D3DC1 nop
00007FFAC53D3DC2 add rsp,48h
00007FFAC53D3DC6 ret
Versus the .NET version:
return (*(UInt64*)(&d) & 0x7FFFFFFFFFFFFFFFL) > 0x7FF0000000000000L;
00007FFAC53D3DE0 movsd mmword ptr [rsp+8],xmm0
00007FFAC53D3DE6 sub rsp,38h
00007FFAC53D3DEA mov rax,7FFAC5423D40h
00007FFAC53D3DF4 mov eax,dword ptr [rax]
00007FFAC53D3DF6 test eax,eax
00007FFAC53D3DF8 je 00007FFAC53D3DFF
00007FFAC53D3DFA call 00007FFB24EE39F0
00007FFAC53D3DFF mov rdx,qword ptr [rsp+40h]
00007FFAC53D3E04 mov rax,7FFFFFFFFFFFFFFFh
00007FFAC53D3E0E and rdx,rax
00007FFAC53D3E11 xor ecx,ecx
00007FFAC53D3E13 mov rax,7FF0000000000000h
00007FFAC53D3E1D cmp rdx,rax
00007FFAC53D3E20 seta cl
00007FFAC53D3E23 mov dword ptr [rsp+20h],ecx
00007FFAC53D3E27 movzx eax,byte ptr [rsp+20h]
00007FFAC53D3E2C jmp 00007FFAC53D3E2E
00007FFAC53D3E2E nop
00007FFAC53D3E2F add rsp,38h
00007FFAC53D3E33 ret
internal
so you would end up copying that source code and having to maintain it for future use. – Kashmir