Dumitru napísal:Kód: Vybrať všetko
uint16_t temp;
uint8_t data[2];
// bežný zápis
temp = data[1];
temp = temp<<8;
temp |= data[0];
// UDAJNE to iste ale inak
*((char *)&Temp) = data[1];
*((char *)&Temp+1) = data[0];
Čaute môžete mi niekto vysvetliť druhy zápis a či je naozaj totožný a rýchlejší ?
*(data) je to iste ako data[0]
*(data+1) je to iste ako data[1]
tomuto rozumiem ale jednasa o pole teda viac premien rovnakého typu
tam ale mam jednu premennú 16 bitovu ktorú ako keby adresovo rozbili na 2-e 8 bitove
Vďaka
@Dumitru, v podobnych situaciach je najlepsie to nakodit a pozret disassembly ako je to prelozene na instrukcie:
Kód: Vybrať všetko
volatile uint16_t temp;
volatile uint8_t data[2] = {0xad, 0xde};
void merge_8bit_1(){
temp = data[1];
temp = temp<<8;
temp |= data[0];
}
void merge_8bit_2(){
*((char *)&temp) = data[1];
*((char *)&temp+1) = data[0];
}
je to kompilovane bez optimalizacie pre cortex-f7 (armv7e)
rozdiel je ze v prvom pripade je to robene ako matematicka operacie, nacitaj register, uprav na format, uloz, zopakuj pre druhe cislo, pak nacitaj, urob OR, uloz.
v druhom pripade je to podobne, ale odpada cele zvlast ukladanie upravenych premennych, znovu nacitanie, prevedenie OR... proste ich nacita, prevede na format a ulozi na iny offset v pamati.
Kód: Vybrať všetko
void merge_8bit_1(){
merge_8bit_1:
push {r7}
add r7, sp, #0
temp = data[1];
ldr r3, [pc, #56] ; (0x20003ee0 <merge_8bit_1+64>)
ldrb r3, [r3, #1]
uxtb r3, r3
uxth r2, r3
ldr r3, [pc, #52] ; (0x20003ee4 <merge_8bit_1+68>)
strh r2, [r3, #0]
temp = temp<<8;
ldr r3, [pc, #48] ; (0x20003ee4 <merge_8bit_1+68>)
ldrh r3, [r3, #0]
uxth r3, r3
lsls r3, r3, #8
uxth r2, r3
ldr r3, [pc, #40] ; (0x20003ee4 <merge_8bit_1+68>)
strh r2, [r3, #0]
temp |= data[0];
ldr r3, [pc, #32] ; (0x20003ee0 <merge_8bit_1+64>)
ldrb r3, [r3, #0]
uxtb r3, r3
uxth r2, r3
ldr r3, [pc, #28] ; (0x20003ee4 <merge_8bit_1+68>)
ldrh r3, [r3, #0]
uxth r3, r3
orrs r3, r2
uxth r2, r3
ldr r3, [pc, #16] ; (0x20003ee4 <merge_8bit_1+68>)
strh r2, [r3, #0]
}
Kód: Vybrať všetko
void merge_8bit_2(){
merge_8bit_2:
push {r7}
add r7, sp, #0
*((char *)&temp) = data[1];
ldr r3, [pc, #32] ; (0x20003f10 <merge_8bit_2+40>)
ldrb r3, [r3, #1]
uxtb r2, r3
ldr r3, [pc, #32] ; (0x20003f14 <merge_8bit_2+44>)
sxtb r2, r2
strb r2, [r3, #0]
*((char *)&temp+1) = data[0];
ldr r3, [pc, #20] ; (0x20003f10 <merge_8bit_2+40>)
ldrb r3, [r3, #0]
uxtb r2, r3
ldr r3, [pc, #24] ; (0x20003f18 <merge_8bit_2+48>)
sxtb r2, r2
strb r2, [r3, #0]
}
ak zapnes optimalizaciu (gcc -O1), situacia je podstatne lepsia, ale stale v prospech druheho zapisu koli ORovaniu v prvom:
Kód: Vybrať všetko
*((char *)&temp) = data[1];
merge_8bit_1:
ldr r1, [pc, #24] ; (0x200027c8 <merge_8bit_1+28>)
ldrb r2, [r1, #1]
ldr r3, [pc, #24] ; (0x200027cc <merge_8bit_1+32>)
strh r2, [r3, #0]
temp = temp<<8;
ldrh r2, [r3, #0]
lsls r2, r2, #8
uxth r2, r2
strh r2, [r3, #0]
temp |= data[0];
ldrb r2, [r1, #0]
ldrh r1, [r3, #0]
orrs r2, r1
strh r2, [r3, #0]
}
Kód: Vybrať všetko
*((char *)&temp) = data[0];
merge_8bit_2:
ldr r2, [pc, #12] ; (0x200027e0 <merge_8bit_2+16>)
ldrb r1, [r2, #1]
ldr r3, [pc, #12] ; (0x200027e4 <merge_8bit_2+20>)
strb r1, [r3, #0]
*((char *)&temp+1) = data[0];
ldrb r2, [r2, #0]
strb r2, [r3, #1]
}
situacia je velmi podobna aj na x86 (gcc -o0):
Kód: Vybrať všetko
merge_8bit_1:
movzbl 0x295b(%rip),%eax # 0x404089 <data+1>
movzbl %al,%edx
lea 0x7238(%rip),%rax # 0x408970 <temp>
mov %dx,(%rax)
temp = temp<<8;
lea 0x722e(%rip),%rax # 0x408970 <temp>
movzwl (%rax),%eax
shl $0x8,%eax
mov %eax,%edx
lea 0x721f(%rip),%rax # 0x408970 <temp>
mov %dx,(%rax)
temp |= data[0];
movzbl 0x292d(%rip),%eax # 0x404088 <data>
movzbl %al,%edx
lea 0x720b(%rip),%rax # 0x408970 <temp>
movzwl (%rax),%eax
or %eax,%edx
lea 0x71ff(%rip),%rax # 0x408970 <temp>
mov %dx,(%rax)
}
Kód: Vybrať všetko
*((char *)&temp) = data[1];
merge_8bit_2:
movzbl 0x290c(%rip),%edx # 0x404089 <data+1>
lea 0x71ec(%rip),%rax # 0x408970 <temp>
mov %rax,%rcx
mov %edx,%eax
mov %al,(%rcx)
*((char *)&temp+1) = data[0];
movzbl 0x28f6(%rip),%edx # 0x404088 <data>
lea 0x71d7(%rip),%rax # 0x408970 <temp>
lea 0x1(%rax),%rax
mov %dl,(%rax)
}
so zapnutou optimalizaciou -O1
Kód: Vybrať všetko
temp = data[1];
merge_8bit_1:
movzbl 0x199f(%rip),%eax # 0x403061 <data+1>
movzbl %al,%eax
mov %ax,0x62a4(%rip) # 0x407970 <temp>
temp = temp<<8;
movzwl 0x629d(%rip),%eax # 0x407970 <temp>
shl $0x8,%eax
mov %ax,0x6293(%rip) # 0x407970 <temp>
temp |= data[0];
movzbl 0x197c(%rip),%eax # 0x403060 <data>
movzwl 0x6285(%rip),%edx # 0x407970 <temp>
movzbl %al,%eax
or %edx,%eax
mov %ax,0x6279(%rip) # 0x407970 <temp>
Kód: Vybrať všetko
*((char *)&temp) = data[1];
merge_8bit_2:
movzbl 0x1962(%rip),%eax # 0x403061 <data+1>
mov %al,0x626b(%rip) # 0x407970 <temp>
*((char *)&temp+1) = data[0];
movzbl 0x1954(%rip),%eax # 0x403060 <data>
mov %al,0x625f(%rip) # 0x407971 <temp+1>
ak aj zvolis vysie optimalizacie, situacia je stale rovnaka, v druhom pripade usetris OR medzi dvoma registrami
Kreativita a motivacia je to, co prinasa originalne napady a myslienky, disciplina je to, co ich dokaze zrealizovat.