The files you cited all has asm code fragment (inline assembler), which is used by some C/C++ software in its own code (as apangin, the JVM expert pointed, mostly in GC code). And there is actually the difference: Linux, Solaris and BSD variants of x86_64 hotspot have prefetches in the hotspot and windows has them disabled/unimplemented which is partially strange, partially unexplainable why, and it may also make JVM bit (some percents; more on platforms without hardware prefetch) slower on Windows, but still will not help to sell more solaris/solaris paid support contracts for Sun/Oracle. Ross also guessed that inline asm syntax may be not supported with MS C++ compiler, but _mm_prefetch
should (Who will open JDK bug to add it to the file?).
JVM hotspot is JIT, and the JITted code is emitted (generated) by JIT as bytes (while it is possible for JIT to copy code from its own functions into generated code or to emit call to the support functions, prefetches are emitted as bytes in hotspot). How can we find how it is emitted? Simple online way is to find some online searchable copy of jdk8u (or better in cross-reference like metager), for example on github: https://github.com/JetBrains/jdk8u_hotspot and do the search of prefetch or prefetch emit or prefetchr or lir_prefetchr. There are some relevant results:
Actual bytes emitted in JVM's c1 compiler / LIR in jdk8u_hotspot/src/cpu/x86/vm/assembler_x86.cpp
:
void Assembler::prefetch_prefix(Address src) {
prefix(src);
emit_int8(0x0F);
}
void Assembler::prefetchnta(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rax, src); // 0, src
}
void Assembler::prefetchr(Address src) {
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x0D);
emit_operand(rax, src); // 0, src
}
void Assembler::prefetcht0(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rcx, src); // 1, src
}
void Assembler::prefetcht1(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rdx, src); // 2, src
}
void Assembler::prefetcht2(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rbx, src); // 3, src
}
void Assembler::prefetchw(Address src) {
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x0D);
emit_operand(rcx, src); // 1, src
}
Usage in c1 LIR: src/share/vm/c1/c1_LIRAssembler.cpp
void LIR_Assembler::emit_op1(LIR_Op1* op) {
switch (op->code()) {
...
case lir_prefetchr:
prefetchr(op->in_opr());
break;
case lir_prefetchw:
prefetchw(op->in_opr());
break;
Now we know the opcode lir_prefetchr
and can search for it or in OpenGrok xref and lir_prefetchw, to find the only example in src/share/vm/c1/c1_LIR.cpp
void LIR_List::prefetch(LIR_Address* addr, bool is_store) {
append(new LIR_Op1(
is_store ? lir_prefetchw : lir_prefetchr,
LIR_OprFact::address(addr)));
}
There are other place where prefetch instructions are defined (for C2, as noted by apangin), the src/cpu/x86/vm/x86_64.ad
:
// Prefetch instructions. ...
instruct prefetchr( memory mem ) %{
predicate(ReadPrefetchInstr==3);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
ins_encode %{
__ prefetchr($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrNTA( memory mem ) %{
predicate(ReadPrefetchInstr==0);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrT0( memory mem ) %{
predicate(ReadPrefetchInstr==1);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrT2( memory mem ) %{
predicate(ReadPrefetchInstr==2);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwNTA( memory mem ) %{
match(PrefetchWrite mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
// Prefetch instructions for allocation.
instruct prefetchAlloc( memory mem ) %{
predicate(AllocatePrefetchInstr==3);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
ins_encode %{
__ prefetchw($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocNTA( memory mem ) %{
predicate(AllocatePrefetchInstr==0);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT0( memory mem ) %{
predicate(AllocatePrefetchInstr==1);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT2( memory mem ) %{
predicate(AllocatePrefetchInstr==2);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
LIR_Assembler::prefetchr
/LIR_Assembler::prefetchw
– Pave