/*personal notes of renzo diomedi*/

~ 00001000 ~



.section .text
.globl _start
_start:
nop
fld1 # Push +1.0 into the FPU stack
fldl2t # Push log(base 2) 10 onto the FPU stack
fldl2e # Push log(base 2) e onto the FPU stack
fldpi # Push the value of pi onto the FPU stack
fldlg2 # Push log(base 10) 2 onto the FPU stack
fldln2 # Push log(base e) 2 onto the FPU stack
fldz # Push +0.0 onto the FPU stack
movl $1, %eax
movl $0, %ebx
int $0x80


(gdb) info all

st0 0 (raw 0x00000000000000000000)
st1 0.6931471805599453094286904741849753 (raw 0x3ffeb17217f7d1cf79ac)
st2 0.30102999566398119522564642835948945 (raw 0x3ffd9a209a84fbcff799)
st3 3.1415926535897932385128089594061862 (raw 0x4000c90fdaa22168c235)
st4 1.4426950408889634073876517827983434 (raw 0x3fffb8aa3b295c17f0bc)
st5 3.3219280948873623478083405569094566 (raw 0x4000d49a784bcd1b8afe)
st6 1 (raw 0x3fff8000000000000000)
st7 0 (raw 0x00000000000000000000)


The SSE technology incorporates eight 128-bit XMM registers that can be used to hold packed floating-point numbers.

Floating-point calculations can be performed in parallel using the multiple data elements, producing results quicker than sequentially processing the data.



The following two new 128-bit floating-point data types are available:

❑ 128-bit packed single-precision floating-point (in SSE)

❑ 128-bit packed double-precision floating-point (in SSE2)

Because a single-precision floating-point value requires 32 bits, the 128-bit register can hold four packed single-precision floating-point values.

These new data types are not available in the FPU or MMX registers.
They can only be used in the XMM registers and only on processors that support SSE or SSE2.
Special instructions must be used to load and retrieve the data values, as well as special math instructions for performing mathematical operations on the packed floating-point data.

SSE floating-point values
There is a complete set of instructions for moving 128-bit packed single-precision floating-point values between memory and the XMM registers on the processor.



Each of these instructions uses the 128-bit XMM registers to move packed 32-bit single-precision floatingpoint values between the XMM registers and memory. Not only can you move entire groups of packed single-precision floating-point values, you can also move a subset of two packed single-precision floating-point values between XMM registers.

# ssefloat
# developed in mingw-w64 environment

.section .data
value1:
.float 12.34, 2345.543, -3493.2, 0.44901
value2:
.float -5439.234, 32121.4, 1.0094, 0.000003
.section .bss
.lcomm data, 16
.section .text
nop
movups value1, %xmm0
movups value2, %xmm1
movups %xmm0, %xmm2
movups %xmm0, data
movl $1, %eax
movl $0, %ebx
int $0x21




ssefloat.s

ssefloat.exe




C:\>as -gstabs -o users\rnz\desktop\ssefloat.o users\rnz\desktop\ssefloat.s
C:\>ld -o users\rnz\desktop\ssefloat.exe users\rnz\desktop\ssefloat.o
C:\>gdb -q users\rnz\desktop\ssefloat.exe
Reading symbols from users\rnz\desktop\ssefloat.exe...done.
(gdb) break 1
Breakpoint 1 at 0x401000: file users\rnz\desktop\ssefloat.s, line 1.
(gdb) run
Starting program: C:\users\rnz\desktop\ssefloat.exe
[New Thread 13684.0x42b4]
Breakpoint 1, ?? () at users\rnz\desktop\ssefloat.s:10
10 nop
(gdb) s
11 movups value1, %xmm0
(gdb) s
12 movups value2, %xmm1
(gdb) s
13 movups %xmm0, %xmm2
(gdb) s 14 movups %xmm0, data

(gdb) print $xmm0
$1 = {v4_float = {12.3400002, 2345.54297, -3493.19995, 0.449010015}, v2_double = {5.6204289471764299e+24, 1.0439462282443856e-05}, v16_int8 = {-92, 112, 69, 65, -80, -104, 18, 69, 51, 83, 90, -59, -92, -28, -27, 62}, v8_int16 = {28836, 16709, -26448, 17682, 21299, -15014, -7004, 16101}, v4_int32 = {1095069860, 1158846640, -983936205, 1055253668}, v2_int64 = {4977208420974555300, 4532279996355072819}, uint128 = 83605809163155287727927076236493680804}

(gdb) print $xmm1
$2 = {v4_float = {-5439.23389, 32121.4004, 1.00940001, 3.00000011e-06}, v2_double = {8.7452727745837517e+33, 3.4658329842889617e-47}, v16_int8 = {-33, -7, -87, -59, -51, -14, -6, 70, 5, 52, -127, 63, -100, 83, 73, 54}, v8_int16 = {-1569, -14935, -3379, 18170, 13317, 16257, 21404, 13897}, v4_int32 = {-978716193, 1190851277, 1065432069, 910775196}, v2_int64 = {5114667292431088095, 3911749681893422085}, uint128 = 72159045262302707577450683077612927455}

(gdb) print $xmm2
$3 = {v4_float = {12.3400002, 2345.54297, -3493.19995, 0.449010015}, v2_double = {5.6204289471764299e+24, 1.0439462282443856e-05}, v16_int8 = {-92, 112, 69, 65, -80, -104, 18, 69, 51, 83, 90, -59, -92, -28, -27, 62}, v8_int16 = {28836, 16709, -26448, 17682, 21299, -15014, -7004, 16101}, v4_int32 = {1095069860, 1158846640, -983936205, 1055253668}, v2_int64 = {4977208420974555300, 4532279996355072819}, uint128 = 83605809163155287727927076236493680804}) (gdb)



(gdb) x/4f &data # NOTE the USE of " f " , 4f = 4 words = 8 bytes
0x403000 : 0 0 0 0 # NOTE that " movups %xmm0, data " is not sufficient
(gdb) s 15 movl $1, %eax # REQUIRED
(gdb) x/4f &data
0x403000 : 12.3400002 2345.54297 -3493.19995 0.449010015 (gdb)
(gdb) x/4f &value1
0x402000 : 12.3400002 2345.54297 -3493.19995 0.449010015
(gdb) x/16b &value1
0x402000 : -92 112 69 65 -80 -104 18 69
0x402008 : 51 83 90 -59 -92 -28 -27 62
(gdb) x/16b &data
0x403000 : -92 112 69 65 -80 -104 18 69
0x403008 : 51 83 90 -59 -92 -28 -27 62
(gdb) x/16x &value1
0x402000 : 0xa4 0x70 0x45 0x41 0xb0 0x98 0x12 0x45
0x402008 : 0x33 0x53 0x5a 0xc5 0xa4 0xe4 0xe5 0x3e
(gdb) x/16x &data
0x403000 : 0xa4 0x70 0x45 0x41 0xb0 0x98 0x12 0x45
0x403008 : 0x33 0x53 0x5a 0xc5 0xa4 0xe4 0xe5 0x3e



Home Page