Two questions:
1) I am using an AMD 64 Opteron processor and I am trying to count instructions. I've done all the proper setup code-wise but the instruction count I receive for the function I'm trying to count is double. The function uses assembly code:
# 4 4 4
.section .text
.align 2,0x90
.globl mxmul
mxmul:
movddup 0x0(%rsi), %xmm0
movdqu 0x0(%rdi), %xmm1
mulpd %xmm1, %xmm0
movddup 0x8(%rsi), %xmm2
movdqu 0x20(%rdi), %xmm3
mulpd %xmm3, %xmm2
addpd %xmm0, %xmm2
movddup 0x20(%rsi), %xmm4
mulpd %xmm1, %xmm4
movddup 0x28(%rsi), %xmm5
mulpd %xmm3, %xmm5
addpd %xmm4, %xmm5
movddup 0x40(%rsi), %xmm6
mulpd %xmm1, %xmm6
movddup 0x48(%rsi), %xmm7
mulpd %xmm3, %xmm7
addpd %xmm6, %xmm7
movddup 0x60(%rsi), %xmm8
mulpd %xmm1, %xmm8
movddup 0x68(%rsi), %xmm9
mulpd %xmm3, %xmm9
addpd %xmm8, %xmm9
movddup 0x10(%rsi), %xmm0
movdqu 0x40(%rdi), %xmm1
mulpd %xmm1, %xmm0
movddup 0x18(%rsi), %xmm10
movdqu 0x60(%rdi), %xmm3
mulpd %xmm3, %xmm10
addpd %xmm0, %xmm10
movddup 0x30(%rsi), %xmm4
mulpd %xmm1, %xmm4
movddup 0x38(%rsi), %xmm11
mulpd %xmm3, %xmm11
addpd %xmm4, %xmm11
movddup 0x50(%rsi), %xmm12
mulpd %xmm1, %xmm12
movddup 0x58(%rsi), %xmm13
mulpd %xmm3, %xmm13
addpd %xmm12, %xmm13
movddup 0x70(%rsi), %xmm14
mulpd %xmm1, %xmm14
movddup 0x78(%rsi), %xmm15
mulpd %xmm3, %xmm15
addpd %xmm14, %xmm15
addpd %xmm2, %xmm10
movapd %xmm10,0x0(%rdx)
addpd %xmm5, %xmm11
movapd %xmm11,0x20(%rdx)
addpd %xmm7, %xmm13
movapd %xmm13,0x40(%rdx)
addpd %xmm9, %xmm15
movapd %xmm15,0x60(%rdx)
movddup 0x0(%rsi), %xmm0
movdqu 0x10(%rdi), %xmm1
mulpd %xmm1, %xmm0
movddup 0x8(%rsi), %xmm2
movdqu 0x30(%rdi), %xmm3
mulpd %xmm3, %xmm2
addpd %xmm0, %xmm2
movddup 0x20(%rsi), %xmm4
mulpd %xmm1, %xmm4
movddup 0x28(%rsi), %xmm5
mulpd %xmm3, %xmm5
addpd %xmm4, %xmm5
movddup 0x40(%rsi), %xmm6
mulpd %xmm1, %xmm6
movddup 0x48(%rsi), %xmm7
mulpd %xmm3, %xmm7
addpd %xmm6, %xmm7
movddup 0x60(%rsi), %xmm8
mulpd %xmm1, %xmm8
movddup 0x68(%rsi), %xmm9
mulpd %xmm3, %xmm9
addpd %xmm8, %xmm9
movddup 0x10(%rsi), %xmm0
movdqu 0x50(%rdi), %xmm1
mulpd %xmm1, %xmm0
movddup 0x18(%rsi), %xmm10
movdqu 0x70(%rdi), %xmm3
mulpd %xmm3, %xmm10
addpd %xmm0, %xmm10
movddup 0x30(%rsi), %xmm4
mulpd %xmm1, %xmm4
movddup 0x38(%rsi), %xmm11
mulpd %xmm3, %xmm11
addpd %xmm4, %xmm11
movddup 0x50(%rsi), %xmm12
mulpd %xmm1, %xmm12
movddup 0x58(%rsi), %xmm13
mulpd %xmm3, %xmm13
addpd %xmm12, %xmm13
movddup 0x70(%rsi), %xmm14
mulpd %xmm1, %xmm14
movddup 0x78(%rsi), %xmm15
mulpd %xmm3, %xmm15
addpd %xmm14, %xmm15
addpd %xmm2, %xmm10
movapd %xmm10,0x10(%rdx)
addpd %xmm5, %xmm11
movapd %xmm11,0x30(%rdx)
addpd %xmm7, %xmm13
movapd %xmm13,0x50(%rdx)
addpd %xmm9, %xmm15
movapd %xmm15,0x70(%rdx)
retq
nop
.align 2, 0x90
.type mxmul,@function
Can you tell me why I get double?
2) Branching from the first question, I tried counting the Vector/SIMD instructions executed, hoping I'd get a more accurate number, and I receive 0 every time. Can you explain any of this?
