PAPI 7.1.0.0
Loading...
Searching...
No Matches
vec_scalar_verify.c File Reference
Include dependency graph for vec_scalar_verify.c:

Go to the source code of this file.

Functions

void papi_stop_and_print_placeholder (long long theory, FILE *fp)
 
void papi_stop_and_print (long long theory, int EventSet, FILE *fp)
 
float test_hp_scalar_VEC_24 (uint64 iterations)
 
float test_hp_scalar_VEC_48 (uint64 iterations)
 
float test_hp_scalar_VEC_96 (uint64 iterations)
 
float test_sp_scalar_VEC_24 (uint64 iterations)
 
float test_sp_scalar_VEC_48 (uint64 iterations)
 
float test_sp_scalar_VEC_96 (uint64 iterations)
 
double test_dp_scalar_VEC_24 (uint64 iterations)
 
double test_dp_scalar_VEC_48 (uint64 iterations)
 
double test_dp_scalar_VEC_96 (uint64 iterations)
 
float test_hp_scalar_VEC_FMA_12 (uint64 iterations)
 
float test_hp_scalar_VEC_FMA_24 (uint64 iterations)
 
float test_hp_scalar_VEC_FMA_48 (uint64 iterations)
 
float test_sp_scalar_VEC_FMA_12 (uint64 iterations)
 
float test_sp_scalar_VEC_FMA_24 (uint64 iterations)
 
float test_sp_scalar_VEC_FMA_48 (uint64 iterations)
 
double test_dp_scalar_VEC_FMA_12 (uint64 iterations)
 
double test_dp_scalar_VEC_FMA_24 (uint64 iterations)
 
double test_dp_scalar_VEC_FMA_48 (uint64 iterations)
 

Function Documentation

◆ papi_stop_and_print()

void papi_stop_and_print ( long long  theory,
int  EventSet,
FILE *  fp 
)

Definition at line 8 of file vec_scalar_verify.c.

9{
10 long long flpins = 0;
11 int retval;
12
13 if ( (retval=PAPI_stop(EventSet, &flpins)) != PAPI_OK){
14 fprintf(stderr, "Problem.\n");
15 return;
16 }
17
18 fprintf(fp, "%lld %lld\n", theory, flpins);
19}
Stop counting hardware events in an event set.
#define PAPI_OK
Definition: f90papi.h:73
static int EventSet
Definition: init_fini.c:8
FILE * stderr
static FILE * fp
int retval
Definition: zero_fork.c:53
Here is the caller graph for this function:

◆ papi_stop_and_print_placeholder()

void papi_stop_and_print_placeholder ( long long  theory,
FILE *  fp 
)

Definition at line 3 of file vec_scalar_verify.c.

4{
5 fprintf(fp, "%lld 0\n", theory);
6}
Here is the caller graph for this function:

◆ test_dp_scalar_VEC_24()

double test_dp_scalar_VEC_24 ( uint64  iterations)

Definition at line 742 of file vec_scalar_verify.c.

742 {
743 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
744
745 /* Generate starting data */
746 r0 = SET_VEC_SD(0.01);
747 r1 = SET_VEC_SD(0.02);
748 r2 = SET_VEC_SD(0.03);
749 r3 = SET_VEC_SD(0.04);
750 r4 = SET_VEC_SD(0.05);
751 r5 = SET_VEC_SD(0.06);
752 r6 = SET_VEC_SD(0.07);
753 r7 = SET_VEC_SD(0.08);
754 r8 = SET_VEC_SD(0.09);
755 r9 = SET_VEC_SD(0.10);
756 rA = SET_VEC_SD(0.11);
757 rB = SET_VEC_SD(0.12);
758 rC = SET_VEC_SD(0.13);
759 rD = SET_VEC_SD(0.14);
760 rE = SET_VEC_SD(0.15);
761 rF = SET_VEC_SD(0.16);
762
763 uint64 c = 0;
764 while (c < iterations){
765 size_t i = 0;
766 while (i < 1000){
767
768 /* The performance critical part */
769 r0 = MUL_VEC_SD(r0,rC);
770 r1 = ADD_VEC_SD(r1,rD);
771 r2 = MUL_VEC_SD(r2,rE);
772 r3 = ADD_VEC_SD(r3,rF);
773 r4 = MUL_VEC_SD(r4,rC);
774 r5 = ADD_VEC_SD(r5,rD);
775 r6 = MUL_VEC_SD(r6,rE);
776 r7 = ADD_VEC_SD(r7,rF);
777 r8 = MUL_VEC_SD(r8,rC);
778 r9 = ADD_VEC_SD(r9,rD);
779 rA = MUL_VEC_SD(rA,rE);
780 rB = ADD_VEC_SD(rB,rF);
781
782 r0 = ADD_VEC_SD(r0,rF);
783 r1 = MUL_VEC_SD(r1,rE);
784 r2 = ADD_VEC_SD(r2,rD);
785 r3 = MUL_VEC_SD(r3,rC);
786 r4 = ADD_VEC_SD(r4,rF);
787 r5 = MUL_VEC_SD(r5,rE);
788 r6 = ADD_VEC_SD(r6,rD);
789 r7 = MUL_VEC_SD(r7,rC);
790 r8 = ADD_VEC_SD(r8,rF);
791 r9 = MUL_VEC_SD(r9,rE);
792 rA = ADD_VEC_SD(rA,rD);
793 rB = MUL_VEC_SD(rB,rC);
794
795 i++;
796 }
797 c++;
798 }
799
800 /* Use data so that compiler does not eliminate it when using -O2 */
801 r0 = ADD_VEC_SD(r0,r1);
802 r2 = ADD_VEC_SD(r2,r3);
803 r4 = ADD_VEC_SD(r4,r5);
804 r6 = ADD_VEC_SD(r6,r7);
805 r8 = ADD_VEC_SD(r8,r9);
806 rA = ADD_VEC_SD(rA,rB);
807
808 r0 = ADD_VEC_SD(r0,r2);
809 r4 = ADD_VEC_SD(r4,r6);
810 r8 = ADD_VEC_SD(r8,rA);
811
812 r0 = ADD_VEC_SD(r0,r4);
813 r0 = ADD_VEC_SD(r0,r8);
814
815 double out = 0;
816 DP_SCALAR_TYPE temp = r0;
817 out += ((double*)&temp)[0];
818
819 return out;
820}
int i
unsigned long long uint64
Definition: cat_arch.h:3
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
Here is the caller graph for this function:

◆ test_dp_scalar_VEC_48()

double test_dp_scalar_VEC_48 ( uint64  iterations)

Definition at line 825 of file vec_scalar_verify.c.

825 {
826 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
827
828 /* Generate starting data */
829 r0 = SET_VEC_SD(0.01);
830 r1 = SET_VEC_SD(0.02);
831 r2 = SET_VEC_SD(0.03);
832 r3 = SET_VEC_SD(0.04);
833 r4 = SET_VEC_SD(0.05);
834 r5 = SET_VEC_SD(0.06);
835 r6 = SET_VEC_SD(0.07);
836 r7 = SET_VEC_SD(0.08);
837 r8 = SET_VEC_SD(0.09);
838 r9 = SET_VEC_SD(0.10);
839 rA = SET_VEC_SD(0.11);
840 rB = SET_VEC_SD(0.12);
841 rC = SET_VEC_SD(0.13);
842 rD = SET_VEC_SD(0.14);
843 rE = SET_VEC_SD(0.15);
844 rF = SET_VEC_SD(0.16);
845
846 uint64 c = 0;
847 while (c < iterations){
848 size_t i = 0;
849 while (i < 1000){
850
851 /* The performance critical part */
852 r0 = MUL_VEC_SD(r0,rC);
853 r1 = ADD_VEC_SD(r1,rD);
854 r2 = MUL_VEC_SD(r2,rE);
855 r3 = ADD_VEC_SD(r3,rF);
856 r4 = MUL_VEC_SD(r4,rC);
857 r5 = ADD_VEC_SD(r5,rD);
858 r6 = MUL_VEC_SD(r6,rE);
859 r7 = ADD_VEC_SD(r7,rF);
860 r8 = MUL_VEC_SD(r8,rC);
861 r9 = ADD_VEC_SD(r9,rD);
862 rA = MUL_VEC_SD(rA,rE);
863 rB = ADD_VEC_SD(rB,rF);
864
865 r0 = ADD_VEC_SD(r0,rF);
866 r1 = MUL_VEC_SD(r1,rE);
867 r2 = ADD_VEC_SD(r2,rD);
868 r3 = MUL_VEC_SD(r3,rC);
869 r4 = ADD_VEC_SD(r4,rF);
870 r5 = MUL_VEC_SD(r5,rE);
871 r6 = ADD_VEC_SD(r6,rD);
872 r7 = MUL_VEC_SD(r7,rC);
873 r8 = ADD_VEC_SD(r8,rF);
874 r9 = MUL_VEC_SD(r9,rE);
875 rA = ADD_VEC_SD(rA,rD);
876 rB = MUL_VEC_SD(rB,rC);
877
878 r0 = MUL_VEC_SD(r0,rC);
879 r1 = ADD_VEC_SD(r1,rD);
880 r2 = MUL_VEC_SD(r2,rE);
881 r3 = ADD_VEC_SD(r3,rF);
882 r4 = MUL_VEC_SD(r4,rC);
883 r5 = ADD_VEC_SD(r5,rD);
884 r6 = MUL_VEC_SD(r6,rE);
885 r7 = ADD_VEC_SD(r7,rF);
886 r8 = MUL_VEC_SD(r8,rC);
887 r9 = ADD_VEC_SD(r9,rD);
888 rA = MUL_VEC_SD(rA,rE);
889 rB = ADD_VEC_SD(rB,rF);
890
891 r0 = ADD_VEC_SD(r0,rF);
892 r1 = MUL_VEC_SD(r1,rE);
893 r2 = ADD_VEC_SD(r2,rD);
894 r3 = MUL_VEC_SD(r3,rC);
895 r4 = ADD_VEC_SD(r4,rF);
896 r5 = MUL_VEC_SD(r5,rE);
897 r6 = ADD_VEC_SD(r6,rD);
898 r7 = MUL_VEC_SD(r7,rC);
899 r8 = ADD_VEC_SD(r8,rF);
900 r9 = MUL_VEC_SD(r9,rE);
901 rA = ADD_VEC_SD(rA,rD);
902 rB = MUL_VEC_SD(rB,rC);
903
904 i++;
905 }
906 c++;
907 }
908
909 /* Use data so that compiler does not eliminate it when using -O2 */
910 r0 = ADD_VEC_SD(r0,r1);
911 r2 = ADD_VEC_SD(r2,r3);
912 r4 = ADD_VEC_SD(r4,r5);
913 r6 = ADD_VEC_SD(r6,r7);
914 r8 = ADD_VEC_SD(r8,r9);
915 rA = ADD_VEC_SD(rA,rB);
916
917 r0 = ADD_VEC_SD(r0,r2);
918 r4 = ADD_VEC_SD(r4,r6);
919 r8 = ADD_VEC_SD(r8,rA);
920
921 r0 = ADD_VEC_SD(r0,r4);
922 r0 = ADD_VEC_SD(r0,r8);
923
924 double out = 0;
925 DP_SCALAR_TYPE temp = r0;
926 out += ((double*)&temp)[0];
927
928 return out;
929}
Here is the caller graph for this function:

◆ test_dp_scalar_VEC_96()

double test_dp_scalar_VEC_96 ( uint64  iterations)

Definition at line 934 of file vec_scalar_verify.c.

934 {
935 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
936
937 /* Generate starting data */
938 r0 = SET_VEC_SD(0.01);
939 r1 = SET_VEC_SD(0.02);
940 r2 = SET_VEC_SD(0.03);
941 r3 = SET_VEC_SD(0.04);
942 r4 = SET_VEC_SD(0.05);
943 r5 = SET_VEC_SD(0.06);
944 r6 = SET_VEC_SD(0.07);
945 r7 = SET_VEC_SD(0.08);
946 r8 = SET_VEC_SD(0.09);
947 r9 = SET_VEC_SD(0.10);
948 rA = SET_VEC_SD(0.11);
949 rB = SET_VEC_SD(0.12);
950 rC = SET_VEC_SD(0.13);
951 rD = SET_VEC_SD(0.14);
952 rE = SET_VEC_SD(0.15);
953 rF = SET_VEC_SD(0.16);
954
955 uint64 c = 0;
956 while (c < iterations){
957 size_t i = 0;
958 while (i < 1000){
959
960 /* The performance critical part */
961 r0 = MUL_VEC_SD(r0,rC);
962 r1 = ADD_VEC_SD(r1,rD);
963 r2 = MUL_VEC_SD(r2,rE);
964 r3 = ADD_VEC_SD(r3,rF);
965 r4 = MUL_VEC_SD(r4,rC);
966 r5 = ADD_VEC_SD(r5,rD);
967 r6 = MUL_VEC_SD(r6,rE);
968 r7 = ADD_VEC_SD(r7,rF);
969 r8 = MUL_VEC_SD(r8,rC);
970 r9 = ADD_VEC_SD(r9,rD);
971 rA = MUL_VEC_SD(rA,rE);
972 rB = ADD_VEC_SD(rB,rF);
973
974 r0 = ADD_VEC_SD(r0,rF);
975 r1 = MUL_VEC_SD(r1,rE);
976 r2 = ADD_VEC_SD(r2,rD);
977 r3 = MUL_VEC_SD(r3,rC);
978 r4 = ADD_VEC_SD(r4,rF);
979 r5 = MUL_VEC_SD(r5,rE);
980 r6 = ADD_VEC_SD(r6,rD);
981 r7 = MUL_VEC_SD(r7,rC);
982 r8 = ADD_VEC_SD(r8,rF);
983 r9 = MUL_VEC_SD(r9,rE);
984 rA = ADD_VEC_SD(rA,rD);
985 rB = MUL_VEC_SD(rB,rC);
986
987 r0 = MUL_VEC_SD(r0,rC);
988 r1 = ADD_VEC_SD(r1,rD);
989 r2 = MUL_VEC_SD(r2,rE);
990 r3 = ADD_VEC_SD(r3,rF);
991 r4 = MUL_VEC_SD(r4,rC);
992 r5 = ADD_VEC_SD(r5,rD);
993 r6 = MUL_VEC_SD(r6,rE);
994 r7 = ADD_VEC_SD(r7,rF);
995 r8 = MUL_VEC_SD(r8,rC);
996 r9 = ADD_VEC_SD(r9,rD);
997 rA = MUL_VEC_SD(rA,rE);
998 rB = ADD_VEC_SD(rB,rF);
999
1000 r0 = ADD_VEC_SD(r0,rF);
1001 r1 = MUL_VEC_SD(r1,rE);
1002 r2 = ADD_VEC_SD(r2,rD);
1003 r3 = MUL_VEC_SD(r3,rC);
1004 r4 = ADD_VEC_SD(r4,rF);
1005 r5 = MUL_VEC_SD(r5,rE);
1006 r6 = ADD_VEC_SD(r6,rD);
1007 r7 = MUL_VEC_SD(r7,rC);
1008 r8 = ADD_VEC_SD(r8,rF);
1009 r9 = MUL_VEC_SD(r9,rE);
1010 rA = ADD_VEC_SD(rA,rD);
1011 rB = MUL_VEC_SD(rB,rC);
1012
1013 r0 = MUL_VEC_SD(r0,rC);
1014 r1 = ADD_VEC_SD(r1,rD);
1015 r2 = MUL_VEC_SD(r2,rE);
1016 r3 = ADD_VEC_SD(r3,rF);
1017 r4 = MUL_VEC_SD(r4,rC);
1018 r5 = ADD_VEC_SD(r5,rD);
1019 r6 = MUL_VEC_SD(r6,rE);
1020 r7 = ADD_VEC_SD(r7,rF);
1021 r8 = MUL_VEC_SD(r8,rC);
1022 r9 = ADD_VEC_SD(r9,rD);
1023 rA = MUL_VEC_SD(rA,rE);
1024 rB = ADD_VEC_SD(rB,rF);
1025
1026 r0 = ADD_VEC_SD(r0,rF);
1027 r1 = MUL_VEC_SD(r1,rE);
1028 r2 = ADD_VEC_SD(r2,rD);
1029 r3 = MUL_VEC_SD(r3,rC);
1030 r4 = ADD_VEC_SD(r4,rF);
1031 r5 = MUL_VEC_SD(r5,rE);
1032 r6 = ADD_VEC_SD(r6,rD);
1033 r7 = MUL_VEC_SD(r7,rC);
1034 r8 = ADD_VEC_SD(r8,rF);
1035 r9 = MUL_VEC_SD(r9,rE);
1036 rA = ADD_VEC_SD(rA,rD);
1037 rB = MUL_VEC_SD(rB,rC);
1038
1039 r0 = MUL_VEC_SD(r0,rC);
1040 r1 = ADD_VEC_SD(r1,rD);
1041 r2 = MUL_VEC_SD(r2,rE);
1042 r3 = ADD_VEC_SD(r3,rF);
1043 r4 = MUL_VEC_SD(r4,rC);
1044 r5 = ADD_VEC_SD(r5,rD);
1045 r6 = MUL_VEC_SD(r6,rE);
1046 r7 = ADD_VEC_SD(r7,rF);
1047 r8 = MUL_VEC_SD(r8,rC);
1048 r9 = ADD_VEC_SD(r9,rD);
1049 rA = MUL_VEC_SD(rA,rE);
1050 rB = ADD_VEC_SD(rB,rF);
1051
1052 r0 = ADD_VEC_SD(r0,rF);
1053 r1 = MUL_VEC_SD(r1,rE);
1054 r2 = ADD_VEC_SD(r2,rD);
1055 r3 = MUL_VEC_SD(r3,rC);
1056 r4 = ADD_VEC_SD(r4,rF);
1057 r5 = MUL_VEC_SD(r5,rE);
1058 r6 = ADD_VEC_SD(r6,rD);
1059 r7 = MUL_VEC_SD(r7,rC);
1060 r8 = ADD_VEC_SD(r8,rF);
1061 r9 = MUL_VEC_SD(r9,rE);
1062 rA = ADD_VEC_SD(rA,rD);
1063 rB = MUL_VEC_SD(rB,rC);
1064
1065 i++;
1066 }
1067 c++;
1068 }
1069
1070 /* Use data so that compiler does not eliminate it when using -O2 */
1071 r0 = ADD_VEC_SD(r0,r1);
1072 r2 = ADD_VEC_SD(r2,r3);
1073 r4 = ADD_VEC_SD(r4,r5);
1074 r6 = ADD_VEC_SD(r6,r7);
1075 r8 = ADD_VEC_SD(r8,r9);
1076 rA = ADD_VEC_SD(rA,rB);
1077
1078 r0 = ADD_VEC_SD(r0,r2);
1079 r4 = ADD_VEC_SD(r4,r6);
1080 r8 = ADD_VEC_SD(r8,rA);
1081
1082 r0 = ADD_VEC_SD(r0,r4);
1083 r0 = ADD_VEC_SD(r0,r8);
1084
1085 double out = 0;
1086 DP_SCALAR_TYPE temp = r0;
1087 out += ((double*)&temp)[0];
1088
1089 return out;
1090}
Here is the caller graph for this function:

◆ test_dp_scalar_VEC_FMA_12()

double test_dp_scalar_VEC_FMA_12 ( uint64  iterations)

Definition at line 1615 of file vec_scalar_verify.c.

1615 {
1616 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1617
1618 /* Generate starting data */
1619 r0 = SET_VEC_SD(0.01);
1620 r1 = SET_VEC_SD(0.02);
1621 r2 = SET_VEC_SD(0.03);
1622 r3 = SET_VEC_SD(0.04);
1623 r4 = SET_VEC_SD(0.05);
1624 r5 = SET_VEC_SD(0.06);
1625 r6 = SET_VEC_SD(0.07);
1626 r7 = SET_VEC_SD(0.08);
1627 r8 = SET_VEC_SD(0.09);
1628 r9 = SET_VEC_SD(0.10);
1629 rA = SET_VEC_SD(0.11);
1630 rB = SET_VEC_SD(0.12);
1631 rC = SET_VEC_SD(0.13);
1632 rD = SET_VEC_SD(0.14);
1633 rE = SET_VEC_SD(0.15);
1634 rF = SET_VEC_SD(0.16);
1635
1636 uint64 c = 0;
1637 while (c < iterations){
1638 size_t i = 0;
1639 while (i < 1000){
1640
1641 /* The performance critical part */
1642 FMA_VEC_SD(r0,r0,r7,r9);
1643 FMA_VEC_SD(r1,r1,r8,rA);
1644 FMA_VEC_SD(r2,r2,r9,rB);
1645 FMA_VEC_SD(r3,r3,rA,rC);
1646 FMA_VEC_SD(r4,r4,rB,rD);
1647 FMA_VEC_SD(r5,r5,rC,rE);
1648
1649 FMA_VEC_SD(r0,r0,rD,rF);
1650 FMA_VEC_SD(r1,r1,rC,rE);
1651 FMA_VEC_SD(r2,r2,rB,rD);
1652 FMA_VEC_SD(r3,r3,rA,rC);
1653 FMA_VEC_SD(r4,r4,r9,rB);
1654 FMA_VEC_SD(r5,r5,r8,rA);
1655
1656 i++;
1657 }
1658 c++;
1659 }
1660
1661 /* Use data so that compiler does not eliminate it when using -O2 */
1662 r0 = ADD_VEC_SD(r0,r1);
1663 r2 = ADD_VEC_SD(r2,r3);
1664 r4 = ADD_VEC_SD(r4,r5);
1665
1666 r0 = ADD_VEC_SD(r0,r6);
1667 r2 = ADD_VEC_SD(r2,r4);
1668
1669 r0 = ADD_VEC_SD(r0,r2);
1670
1671 double out = 0;
1672 DP_SCALAR_TYPE temp = r0;
1673 out += ((double*)&temp)[0];
1674
1675 return out;
1676}
Here is the caller graph for this function:

◆ test_dp_scalar_VEC_FMA_24()

double test_dp_scalar_VEC_FMA_24 ( uint64  iterations)

Definition at line 1681 of file vec_scalar_verify.c.

1681 {
1682 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1683
1684 /* Generate starting data */
1685 r0 = SET_VEC_SD(0.01);
1686 r1 = SET_VEC_SD(0.02);
1687 r2 = SET_VEC_SD(0.03);
1688 r3 = SET_VEC_SD(0.04);
1689 r4 = SET_VEC_SD(0.05);
1690 r5 = SET_VEC_SD(0.06);
1691 r6 = SET_VEC_SD(0.07);
1692 r7 = SET_VEC_SD(0.08);
1693 r8 = SET_VEC_SD(0.09);
1694 r9 = SET_VEC_SD(0.10);
1695 rA = SET_VEC_SD(0.11);
1696 rB = SET_VEC_SD(0.12);
1697 rC = SET_VEC_SD(0.13);
1698 rD = SET_VEC_SD(0.14);
1699 rE = SET_VEC_SD(0.15);
1700 rF = SET_VEC_SD(0.16);
1701
1702 uint64 c = 0;
1703 while (c < iterations){
1704 size_t i = 0;
1705 while (i < 1000){
1706
1707 /* The performance critical part */
1708 FMA_VEC_SD(r0,r0,r7,r9);
1709 FMA_VEC_SD(r1,r1,r8,rA);
1710 FMA_VEC_SD(r2,r2,r9,rB);
1711 FMA_VEC_SD(r3,r3,rA,rC);
1712 FMA_VEC_SD(r4,r4,rB,rD);
1713 FMA_VEC_SD(r5,r5,rC,rE);
1714
1715 FMA_VEC_SD(r0,r0,rD,rF);
1716 FMA_VEC_SD(r1,r1,rC,rE);
1717 FMA_VEC_SD(r2,r2,rB,rD);
1718 FMA_VEC_SD(r3,r3,rA,rC);
1719 FMA_VEC_SD(r4,r4,r9,rB);
1720 FMA_VEC_SD(r5,r5,r8,rA);
1721
1722 FMA_VEC_SD(r0,r0,r7,r9);
1723 FMA_VEC_SD(r1,r1,r8,rA);
1724 FMA_VEC_SD(r2,r2,r9,rB);
1725 FMA_VEC_SD(r3,r3,rA,rC);
1726 FMA_VEC_SD(r4,r4,rB,rD);
1727 FMA_VEC_SD(r5,r5,rC,rE);
1728
1729 FMA_VEC_SD(r0,r0,rD,rF);
1730 FMA_VEC_SD(r1,r1,rC,rE);
1731 FMA_VEC_SD(r2,r2,rB,rD);
1732 FMA_VEC_SD(r3,r3,rA,rC);
1733 FMA_VEC_SD(r4,r4,r9,rB);
1734 FMA_VEC_SD(r5,r5,r8,rA);
1735
1736 i++;
1737 }
1738 c++;
1739 }
1740
1741 /* Use data so that compiler does not eliminate it when using -O2 */
1742 r0 = ADD_VEC_SD(r0,r1);
1743 r2 = ADD_VEC_SD(r2,r3);
1744 r4 = ADD_VEC_SD(r4,r5);
1745
1746 r0 = ADD_VEC_SD(r0,r6);
1747 r2 = ADD_VEC_SD(r2,r4);
1748
1749 r0 = ADD_VEC_SD(r0,r2);
1750
1751 double out = 0;
1752 DP_SCALAR_TYPE temp = r0;
1753 out += ((double*)&temp)[0];
1754
1755 return out;
1756}
Here is the caller graph for this function:

◆ test_dp_scalar_VEC_FMA_48()

double test_dp_scalar_VEC_FMA_48 ( uint64  iterations)

Definition at line 1761 of file vec_scalar_verify.c.

1761 {
1762 register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1763
1764 /* Generate starting data */
1765 r0 = SET_VEC_SD(0.01);
1766 r1 = SET_VEC_SD(0.02);
1767 r2 = SET_VEC_SD(0.03);
1768 r3 = SET_VEC_SD(0.04);
1769 r4 = SET_VEC_SD(0.05);
1770 r5 = SET_VEC_SD(0.06);
1771 r6 = SET_VEC_SD(0.07);
1772 r7 = SET_VEC_SD(0.08);
1773 r8 = SET_VEC_SD(0.09);
1774 r9 = SET_VEC_SD(0.10);
1775 rA = SET_VEC_SD(0.11);
1776 rB = SET_VEC_SD(0.12);
1777 rC = SET_VEC_SD(0.13);
1778 rD = SET_VEC_SD(0.14);
1779 rE = SET_VEC_SD(0.15);
1780 rF = SET_VEC_SD(0.16);
1781
1782 uint64 c = 0;
1783 while (c < iterations){
1784 size_t i = 0;
1785 while (i < 1000){
1786
1787 /* The performance critical part */
1788 FMA_VEC_SD(r0,r0,r7,r9);
1789 FMA_VEC_SD(r1,r1,r8,rA);
1790 FMA_VEC_SD(r2,r2,r9,rB);
1791 FMA_VEC_SD(r3,r3,rA,rC);
1792 FMA_VEC_SD(r4,r4,rB,rD);
1793 FMA_VEC_SD(r5,r5,rC,rE);
1794
1795 FMA_VEC_SD(r0,r0,rD,rF);
1796 FMA_VEC_SD(r1,r1,rC,rE);
1797 FMA_VEC_SD(r2,r2,rB,rD);
1798 FMA_VEC_SD(r3,r3,rA,rC);
1799 FMA_VEC_SD(r4,r4,r9,rB);
1800 FMA_VEC_SD(r5,r5,r8,rA);
1801
1802 FMA_VEC_SD(r0,r0,r7,r9);
1803 FMA_VEC_SD(r1,r1,r8,rA);
1804 FMA_VEC_SD(r2,r2,r9,rB);
1805 FMA_VEC_SD(r3,r3,rA,rC);
1806 FMA_VEC_SD(r4,r4,rB,rD);
1807 FMA_VEC_SD(r5,r5,rC,rE);
1808
1809 FMA_VEC_SD(r0,r0,rD,rF);
1810 FMA_VEC_SD(r1,r1,rC,rE);
1811 FMA_VEC_SD(r2,r2,rB,rD);
1812 FMA_VEC_SD(r3,r3,rA,rC);
1813 FMA_VEC_SD(r4,r4,r9,rB);
1814 FMA_VEC_SD(r5,r5,r8,rA);
1815
1816 FMA_VEC_SD(r0,r0,r7,r9);
1817 FMA_VEC_SD(r1,r1,r8,rA);
1818 FMA_VEC_SD(r2,r2,r9,rB);
1819 FMA_VEC_SD(r3,r3,rA,rC);
1820 FMA_VEC_SD(r4,r4,rB,rD);
1821 FMA_VEC_SD(r5,r5,rC,rE);
1822
1823 FMA_VEC_SD(r0,r0,rD,rF);
1824 FMA_VEC_SD(r1,r1,rC,rE);
1825 FMA_VEC_SD(r2,r2,rB,rD);
1826 FMA_VEC_SD(r3,r3,rA,rC);
1827 FMA_VEC_SD(r4,r4,r9,rB);
1828 FMA_VEC_SD(r5,r5,r8,rA);
1829
1830 FMA_VEC_SD(r0,r0,r7,r9);
1831 FMA_VEC_SD(r1,r1,r8,rA);
1832 FMA_VEC_SD(r2,r2,r9,rB);
1833 FMA_VEC_SD(r3,r3,rA,rC);
1834 FMA_VEC_SD(r4,r4,rB,rD);
1835 FMA_VEC_SD(r5,r5,rC,rE);
1836
1837 FMA_VEC_SD(r0,r0,rD,rF);
1838 FMA_VEC_SD(r1,r1,rC,rE);
1839 FMA_VEC_SD(r2,r2,rB,rD);
1840 FMA_VEC_SD(r3,r3,rA,rC);
1841 FMA_VEC_SD(r4,r4,r9,rB);
1842 FMA_VEC_SD(r5,r5,r8,rA);
1843
1844 i++;
1845 }
1846 c++;
1847 }
1848
1849 /* Use data so that compiler does not eliminate it when using -O2 */
1850 r0 = ADD_VEC_SD(r0,r1);
1851 r2 = ADD_VEC_SD(r2,r3);
1852 r4 = ADD_VEC_SD(r4,r5);
1853
1854 r0 = ADD_VEC_SD(r0,r6);
1855 r2 = ADD_VEC_SD(r2,r4);
1856
1857 r0 = ADD_VEC_SD(r0,r2);
1858
1859 double out = 0;
1860 DP_SCALAR_TYPE temp = r0;
1861 out += ((double*)&temp)[0];
1862
1863 return out;
1864}
Here is the caller graph for this function:

◆ test_hp_scalar_VEC_24()

float test_hp_scalar_VEC_24 ( uint64  iterations)

Definition at line 367 of file vec_scalar_verify.c.

367 {
368
369 (void)iterations;
370 return 0.0;
371}
Here is the caller graph for this function:

◆ test_hp_scalar_VEC_48()

float test_hp_scalar_VEC_48 ( uint64  iterations)

Definition at line 373 of file vec_scalar_verify.c.

373 {
374
375 (void)iterations;
376 return 0.0;
377}
Here is the caller graph for this function:

◆ test_hp_scalar_VEC_96()

float test_hp_scalar_VEC_96 ( uint64  iterations)

Definition at line 379 of file vec_scalar_verify.c.

379 {
380
381 (void)iterations;
382 return 0.0;
383}
Here is the caller graph for this function:

◆ test_hp_scalar_VEC_FMA_12()

float test_hp_scalar_VEC_FMA_12 ( uint64  iterations)

Definition at line 1339 of file vec_scalar_verify.c.

1339 {
1340
1341 (void)iterations;
1342 return 0.0;
1343}
Here is the caller graph for this function:

◆ test_hp_scalar_VEC_FMA_24()

float test_hp_scalar_VEC_FMA_24 ( uint64  iterations)

Definition at line 1345 of file vec_scalar_verify.c.

1345 {
1346
1347 (void)iterations;
1348 return 0.0;
1349}
Here is the caller graph for this function:

◆ test_hp_scalar_VEC_FMA_48()

float test_hp_scalar_VEC_FMA_48 ( uint64  iterations)

Definition at line 1351 of file vec_scalar_verify.c.

1351 {
1352
1353 (void)iterations;
1354 return 0.0;
1355}
Here is the caller graph for this function:

◆ test_sp_scalar_VEC_24()

float test_sp_scalar_VEC_24 ( uint64  iterations)

Definition at line 389 of file vec_scalar_verify.c.

389 {
390 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
391
392 /* Generate starting data */
393 r0 = SET_VEC_SS(0.01);
394 r1 = SET_VEC_SS(0.02);
395 r2 = SET_VEC_SS(0.03);
396 r3 = SET_VEC_SS(0.04);
397 r4 = SET_VEC_SS(0.05);
398 r5 = SET_VEC_SS(0.06);
399 r6 = SET_VEC_SS(0.07);
400 r7 = SET_VEC_SS(0.08);
401 r8 = SET_VEC_SS(0.09);
402 r9 = SET_VEC_SS(0.10);
403 rA = SET_VEC_SS(0.11);
404 rB = SET_VEC_SS(0.12);
405 rC = SET_VEC_SS(0.13);
406 rD = SET_VEC_SS(0.14);
407 rE = SET_VEC_SS(0.15);
408 rF = SET_VEC_SS(0.16);
409
410 uint64 c = 0;
411 while (c < iterations){
412 size_t i = 0;
413 while (i < 1000){
414
415 /* The performance critical part */
416 r0 = MUL_VEC_SS(r0,rC);
417 r1 = ADD_VEC_SS(r1,rD);
418 r2 = MUL_VEC_SS(r2,rE);
419 r3 = ADD_VEC_SS(r3,rF);
420 r4 = MUL_VEC_SS(r4,rC);
421 r5 = ADD_VEC_SS(r5,rD);
422 r6 = MUL_VEC_SS(r6,rE);
423 r7 = ADD_VEC_SS(r7,rF);
424 r8 = MUL_VEC_SS(r8,rC);
425 r9 = ADD_VEC_SS(r9,rD);
426 rA = MUL_VEC_SS(rA,rE);
427 rB = ADD_VEC_SS(rB,rF);
428
429 r0 = ADD_VEC_SS(r0,rF);
430 r1 = MUL_VEC_SS(r1,rE);
431 r2 = ADD_VEC_SS(r2,rD);
432 r3 = MUL_VEC_SS(r3,rC);
433 r4 = ADD_VEC_SS(r4,rF);
434 r5 = MUL_VEC_SS(r5,rE);
435 r6 = ADD_VEC_SS(r6,rD);
436 r7 = MUL_VEC_SS(r7,rC);
437 r8 = ADD_VEC_SS(r8,rF);
438 r9 = MUL_VEC_SS(r9,rE);
439 rA = ADD_VEC_SS(rA,rD);
440 rB = MUL_VEC_SS(rB,rC);
441
442 i++;
443 }
444 c++;
445 }
446
447 /* Use data so that compiler does not eliminate it when using -O2 */
448 r0 = ADD_VEC_SS(r0,r1);
449 r2 = ADD_VEC_SS(r2,r3);
450 r4 = ADD_VEC_SS(r4,r5);
451 r6 = ADD_VEC_SS(r6,r7);
452 r8 = ADD_VEC_SS(r8,r9);
453 rA = ADD_VEC_SS(rA,rB);
454
455 r0 = ADD_VEC_SS(r0,r2);
456 r4 = ADD_VEC_SS(r4,r6);
457 r8 = ADD_VEC_SS(r8,rA);
458
459 r0 = ADD_VEC_SS(r0,r4);
460 r0 = ADD_VEC_SS(r0,r8);
461
462 float out = 0;
463 SP_SCALAR_TYPE temp = r0;
464 out += ((float*)&temp)[0];
465
466 return out;
467}
Here is the caller graph for this function:

◆ test_sp_scalar_VEC_48()

float test_sp_scalar_VEC_48 ( uint64  iterations)

Definition at line 472 of file vec_scalar_verify.c.

472 {
473 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
474
475 /* Generate starting data */
476 r0 = SET_VEC_SS(0.01);
477 r1 = SET_VEC_SS(0.02);
478 r2 = SET_VEC_SS(0.03);
479 r3 = SET_VEC_SS(0.04);
480 r4 = SET_VEC_SS(0.05);
481 r5 = SET_VEC_SS(0.06);
482 r6 = SET_VEC_SS(0.07);
483 r7 = SET_VEC_SS(0.08);
484 r8 = SET_VEC_SS(0.09);
485 r9 = SET_VEC_SS(0.10);
486 rA = SET_VEC_SS(0.11);
487 rB = SET_VEC_SS(0.12);
488 rC = SET_VEC_SS(0.13);
489 rD = SET_VEC_SS(0.14);
490 rE = SET_VEC_SS(0.15);
491 rF = SET_VEC_SS(0.16);
492
493 uint64 c = 0;
494 while (c < iterations){
495 size_t i = 0;
496 while (i < 1000){
497
498 /* The performance critical part */
499 r0 = MUL_VEC_SS(r0,rC);
500 r1 = ADD_VEC_SS(r1,rD);
501 r2 = MUL_VEC_SS(r2,rE);
502 r3 = ADD_VEC_SS(r3,rF);
503 r4 = MUL_VEC_SS(r4,rC);
504 r5 = ADD_VEC_SS(r5,rD);
505 r6 = MUL_VEC_SS(r6,rE);
506 r7 = ADD_VEC_SS(r7,rF);
507 r8 = MUL_VEC_SS(r8,rC);
508 r9 = ADD_VEC_SS(r9,rD);
509 rA = MUL_VEC_SS(rA,rE);
510 rB = ADD_VEC_SS(rB,rF);
511
512 r0 = ADD_VEC_SS(r0,rF);
513 r1 = MUL_VEC_SS(r1,rE);
514 r2 = ADD_VEC_SS(r2,rD);
515 r3 = MUL_VEC_SS(r3,rC);
516 r4 = ADD_VEC_SS(r4,rF);
517 r5 = MUL_VEC_SS(r5,rE);
518 r6 = ADD_VEC_SS(r6,rD);
519 r7 = MUL_VEC_SS(r7,rC);
520 r8 = ADD_VEC_SS(r8,rF);
521 r9 = MUL_VEC_SS(r9,rE);
522 rA = ADD_VEC_SS(rA,rD);
523 rB = MUL_VEC_SS(rB,rC);
524
525 r0 = MUL_VEC_SS(r0,rC);
526 r1 = ADD_VEC_SS(r1,rD);
527 r2 = MUL_VEC_SS(r2,rE);
528 r3 = ADD_VEC_SS(r3,rF);
529 r4 = MUL_VEC_SS(r4,rC);
530 r5 = ADD_VEC_SS(r5,rD);
531 r6 = MUL_VEC_SS(r6,rE);
532 r7 = ADD_VEC_SS(r7,rF);
533 r8 = MUL_VEC_SS(r8,rC);
534 r9 = ADD_VEC_SS(r9,rD);
535 rA = MUL_VEC_SS(rA,rE);
536 rB = ADD_VEC_SS(rB,rF);
537
538 r0 = ADD_VEC_SS(r0,rF);
539 r1 = MUL_VEC_SS(r1,rE);
540 r2 = ADD_VEC_SS(r2,rD);
541 r3 = MUL_VEC_SS(r3,rC);
542 r4 = ADD_VEC_SS(r4,rF);
543 r5 = MUL_VEC_SS(r5,rE);
544 r6 = ADD_VEC_SS(r6,rD);
545 r7 = MUL_VEC_SS(r7,rC);
546 r8 = ADD_VEC_SS(r8,rF);
547 r9 = MUL_VEC_SS(r9,rE);
548 rA = ADD_VEC_SS(rA,rD);
549 rB = MUL_VEC_SS(rB,rC);
550
551 i++;
552 }
553 c++;
554 }
555
556 /* Use data so that compiler does not eliminate it when using -O2 */
557 r0 = ADD_VEC_SS(r0,r1);
558 r2 = ADD_VEC_SS(r2,r3);
559 r4 = ADD_VEC_SS(r4,r5);
560 r6 = ADD_VEC_SS(r6,r7);
561 r8 = ADD_VEC_SS(r8,r9);
562 rA = ADD_VEC_SS(rA,rB);
563
564 r0 = ADD_VEC_SS(r0,r2);
565 r4 = ADD_VEC_SS(r4,r6);
566 r8 = ADD_VEC_SS(r8,rA);
567
568 r0 = ADD_VEC_SS(r0,r4);
569 r0 = ADD_VEC_SS(r0,r8);
570
571 float out = 0;
572 SP_SCALAR_TYPE temp = r0;
573 out += ((float*)&temp)[0];
574
575 return out;
576}
Here is the caller graph for this function:

◆ test_sp_scalar_VEC_96()

float test_sp_scalar_VEC_96 ( uint64  iterations)

Definition at line 581 of file vec_scalar_verify.c.

581 {
582 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
583
584 /* Generate starting data */
585 r0 = SET_VEC_SS(0.01);
586 r1 = SET_VEC_SS(0.02);
587 r2 = SET_VEC_SS(0.03);
588 r3 = SET_VEC_SS(0.04);
589 r4 = SET_VEC_SS(0.05);
590 r5 = SET_VEC_SS(0.06);
591 r6 = SET_VEC_SS(0.07);
592 r7 = SET_VEC_SS(0.08);
593 r8 = SET_VEC_SS(0.09);
594 r9 = SET_VEC_SS(0.10);
595 rA = SET_VEC_SS(0.11);
596 rB = SET_VEC_SS(0.12);
597 rC = SET_VEC_SS(0.13);
598 rD = SET_VEC_SS(0.14);
599 rE = SET_VEC_SS(0.15);
600 rF = SET_VEC_SS(0.16);
601
602 uint64 c = 0;
603 while (c < iterations){
604 size_t i = 0;
605 while (i < 1000){
606
607 /* The performance critical part */
608 r0 = MUL_VEC_SS(r0,rC);
609 r1 = ADD_VEC_SS(r1,rD);
610 r2 = MUL_VEC_SS(r2,rE);
611 r3 = ADD_VEC_SS(r3,rF);
612 r4 = MUL_VEC_SS(r4,rC);
613 r5 = ADD_VEC_SS(r5,rD);
614 r6 = MUL_VEC_SS(r6,rE);
615 r7 = ADD_VEC_SS(r7,rF);
616 r8 = MUL_VEC_SS(r8,rC);
617 r9 = ADD_VEC_SS(r9,rD);
618 rA = MUL_VEC_SS(rA,rE);
619 rB = ADD_VEC_SS(rB,rF);
620
621 r0 = ADD_VEC_SS(r0,rF);
622 r1 = MUL_VEC_SS(r1,rE);
623 r2 = ADD_VEC_SS(r2,rD);
624 r3 = MUL_VEC_SS(r3,rC);
625 r4 = ADD_VEC_SS(r4,rF);
626 r5 = MUL_VEC_SS(r5,rE);
627 r6 = ADD_VEC_SS(r6,rD);
628 r7 = MUL_VEC_SS(r7,rC);
629 r8 = ADD_VEC_SS(r8,rF);
630 r9 = MUL_VEC_SS(r9,rE);
631 rA = ADD_VEC_SS(rA,rD);
632 rB = MUL_VEC_SS(rB,rC);
633
634 r0 = MUL_VEC_SS(r0,rC);
635 r1 = ADD_VEC_SS(r1,rD);
636 r2 = MUL_VEC_SS(r2,rE);
637 r3 = ADD_VEC_SS(r3,rF);
638 r4 = MUL_VEC_SS(r4,rC);
639 r5 = ADD_VEC_SS(r5,rD);
640 r6 = MUL_VEC_SS(r6,rE);
641 r7 = ADD_VEC_SS(r7,rF);
642 r8 = MUL_VEC_SS(r8,rC);
643 r9 = ADD_VEC_SS(r9,rD);
644 rA = MUL_VEC_SS(rA,rE);
645 rB = ADD_VEC_SS(rB,rF);
646
647 r0 = ADD_VEC_SS(r0,rF);
648 r1 = MUL_VEC_SS(r1,rE);
649 r2 = ADD_VEC_SS(r2,rD);
650 r3 = MUL_VEC_SS(r3,rC);
651 r4 = ADD_VEC_SS(r4,rF);
652 r5 = MUL_VEC_SS(r5,rE);
653 r6 = ADD_VEC_SS(r6,rD);
654 r7 = MUL_VEC_SS(r7,rC);
655 r8 = ADD_VEC_SS(r8,rF);
656 r9 = MUL_VEC_SS(r9,rE);
657 rA = ADD_VEC_SS(rA,rD);
658 rB = MUL_VEC_SS(rB,rC);
659
660 r0 = MUL_VEC_SS(r0,rC);
661 r1 = ADD_VEC_SS(r1,rD);
662 r2 = MUL_VEC_SS(r2,rE);
663 r3 = ADD_VEC_SS(r3,rF);
664 r4 = MUL_VEC_SS(r4,rC);
665 r5 = ADD_VEC_SS(r5,rD);
666 r6 = MUL_VEC_SS(r6,rE);
667 r7 = ADD_VEC_SS(r7,rF);
668 r8 = MUL_VEC_SS(r8,rC);
669 r9 = ADD_VEC_SS(r9,rD);
670 rA = MUL_VEC_SS(rA,rE);
671 rB = ADD_VEC_SS(rB,rF);
672
673 r0 = ADD_VEC_SS(r0,rF);
674 r1 = MUL_VEC_SS(r1,rE);
675 r2 = ADD_VEC_SS(r2,rD);
676 r3 = MUL_VEC_SS(r3,rC);
677 r4 = ADD_VEC_SS(r4,rF);
678 r5 = MUL_VEC_SS(r5,rE);
679 r6 = ADD_VEC_SS(r6,rD);
680 r7 = MUL_VEC_SS(r7,rC);
681 r8 = ADD_VEC_SS(r8,rF);
682 r9 = MUL_VEC_SS(r9,rE);
683 rA = ADD_VEC_SS(rA,rD);
684 rB = MUL_VEC_SS(rB,rC);
685
686 r0 = MUL_VEC_SS(r0,rC);
687 r1 = ADD_VEC_SS(r1,rD);
688 r2 = MUL_VEC_SS(r2,rE);
689 r3 = ADD_VEC_SS(r3,rF);
690 r4 = MUL_VEC_SS(r4,rC);
691 r5 = ADD_VEC_SS(r5,rD);
692 r6 = MUL_VEC_SS(r6,rE);
693 r7 = ADD_VEC_SS(r7,rF);
694 r8 = MUL_VEC_SS(r8,rC);
695 r9 = ADD_VEC_SS(r9,rD);
696 rA = MUL_VEC_SS(rA,rE);
697 rB = ADD_VEC_SS(rB,rF);
698
699 r0 = ADD_VEC_SS(r0,rF);
700 r1 = MUL_VEC_SS(r1,rE);
701 r2 = ADD_VEC_SS(r2,rD);
702 r3 = MUL_VEC_SS(r3,rC);
703 r4 = ADD_VEC_SS(r4,rF);
704 r5 = MUL_VEC_SS(r5,rE);
705 r6 = ADD_VEC_SS(r6,rD);
706 r7 = MUL_VEC_SS(r7,rC);
707 r8 = ADD_VEC_SS(r8,rF);
708 r9 = MUL_VEC_SS(r9,rE);
709 rA = ADD_VEC_SS(rA,rD);
710 rB = MUL_VEC_SS(rB,rC);
711
712 i++;
713 }
714 c++;
715 }
716
717 /* Use data so that compiler does not eliminate it when using -O2 */
718 r0 = ADD_VEC_SS(r0,r1);
719 r2 = ADD_VEC_SS(r2,r3);
720 r4 = ADD_VEC_SS(r4,r5);
721 r6 = ADD_VEC_SS(r6,r7);
722 r8 = ADD_VEC_SS(r8,r9);
723 rA = ADD_VEC_SS(rA,rB);
724
725 r0 = ADD_VEC_SS(r0,r2);
726 r4 = ADD_VEC_SS(r4,r6);
727 r8 = ADD_VEC_SS(r8,rA);
728
729 r0 = ADD_VEC_SS(r0,r4);
730 r0 = ADD_VEC_SS(r0,r8);
731
732 float out = 0;
733 SP_SCALAR_TYPE temp = r0;
734 out += ((float*)&temp)[0];
735
736 return out;
737}
Here is the caller graph for this function:

◆ test_sp_scalar_VEC_FMA_12()

float test_sp_scalar_VEC_FMA_12 ( uint64  iterations)

Definition at line 1361 of file vec_scalar_verify.c.

1361 {
1362 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1363
1364 /* Generate starting data */
1365 r0 = SET_VEC_SS(0.01);
1366 r1 = SET_VEC_SS(0.02);
1367 r2 = SET_VEC_SS(0.03);
1368 r3 = SET_VEC_SS(0.04);
1369 r4 = SET_VEC_SS(0.05);
1370 r5 = SET_VEC_SS(0.06);
1371 r6 = SET_VEC_SS(0.07);
1372 r7 = SET_VEC_SS(0.08);
1373 r8 = SET_VEC_SS(0.09);
1374 r9 = SET_VEC_SS(0.10);
1375 rA = SET_VEC_SS(0.11);
1376 rB = SET_VEC_SS(0.12);
1377 rC = SET_VEC_SS(0.13);
1378 rD = SET_VEC_SS(0.14);
1379 rE = SET_VEC_SS(0.15);
1380 rF = SET_VEC_SS(0.16);
1381
1382 uint64 c = 0;
1383 while (c < iterations){
1384 size_t i = 0;
1385 while (i < 1000){
1386
1387 /* The performance critical part */
1388 FMA_VEC_SS(r0,r0,r7,r9);
1389 FMA_VEC_SS(r1,r1,r8,rA);
1390 FMA_VEC_SS(r2,r2,r9,rB);
1391 FMA_VEC_SS(r3,r3,rA,rC);
1392 FMA_VEC_SS(r4,r4,rB,rD);
1393 FMA_VEC_SS(r5,r5,rC,rE);
1394
1395 FMA_VEC_SS(r0,r0,rD,rF);
1396 FMA_VEC_SS(r1,r1,rC,rE);
1397 FMA_VEC_SS(r2,r2,rB,rD);
1398 FMA_VEC_SS(r3,r3,rA,rC);
1399 FMA_VEC_SS(r4,r4,r9,rB);
1400 FMA_VEC_SS(r5,r5,r8,rA);
1401
1402 i++;
1403 }
1404 c++;
1405 }
1406
1407 /* Use data so that compiler does not eliminate it when using -O2 */
1408 r0 = ADD_VEC_SS(r0,r1);
1409 r2 = ADD_VEC_SS(r2,r3);
1410 r4 = ADD_VEC_SS(r4,r5);
1411
1412 r0 = ADD_VEC_SS(r0,r6);
1413 r2 = ADD_VEC_SS(r2,r4);
1414
1415 r0 = ADD_VEC_SS(r0,r2);
1416
1417 float out = 0;
1418 SP_SCALAR_TYPE temp = r0;
1419 out += ((float*)&temp)[0];
1420
1421 return out;
1422}
Here is the caller graph for this function:

◆ test_sp_scalar_VEC_FMA_24()

float test_sp_scalar_VEC_FMA_24 ( uint64  iterations)

Definition at line 1427 of file vec_scalar_verify.c.

1427 {
1428 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1429
1430 /* Generate starting data */
1431 r0 = SET_VEC_SS(0.01);
1432 r1 = SET_VEC_SS(0.02);
1433 r2 = SET_VEC_SS(0.03);
1434 r3 = SET_VEC_SS(0.04);
1435 r4 = SET_VEC_SS(0.05);
1436 r5 = SET_VEC_SS(0.06);
1437 r6 = SET_VEC_SS(0.07);
1438 r7 = SET_VEC_SS(0.08);
1439 r8 = SET_VEC_SS(0.09);
1440 r9 = SET_VEC_SS(0.10);
1441 rA = SET_VEC_SS(0.11);
1442 rB = SET_VEC_SS(0.12);
1443 rC = SET_VEC_SS(0.13);
1444 rD = SET_VEC_SS(0.14);
1445 rE = SET_VEC_SS(0.15);
1446 rF = SET_VEC_SS(0.16);
1447
1448 uint64 c = 0;
1449 while (c < iterations){
1450 size_t i = 0;
1451 while (i < 1000){
1452
1453 /* The performance critical part */
1454 FMA_VEC_SS(r0,r0,r7,r9);
1455 FMA_VEC_SS(r1,r1,r8,rA);
1456 FMA_VEC_SS(r2,r2,r9,rB);
1457 FMA_VEC_SS(r3,r3,rA,rC);
1458 FMA_VEC_SS(r4,r4,rB,rD);
1459 FMA_VEC_SS(r5,r5,rC,rE);
1460
1461 FMA_VEC_SS(r0,r0,rD,rF);
1462 FMA_VEC_SS(r1,r1,rC,rE);
1463 FMA_VEC_SS(r2,r2,rB,rD);
1464 FMA_VEC_SS(r3,r3,rA,rC);
1465 FMA_VEC_SS(r4,r4,r9,rB);
1466 FMA_VEC_SS(r5,r5,r8,rA);
1467
1468 FMA_VEC_SS(r0,r0,r7,r9);
1469 FMA_VEC_SS(r1,r1,r8,rA);
1470 FMA_VEC_SS(r2,r2,r9,rB);
1471 FMA_VEC_SS(r3,r3,rA,rC);
1472 FMA_VEC_SS(r4,r4,rB,rD);
1473 FMA_VEC_SS(r5,r5,rC,rE);
1474
1475 FMA_VEC_SS(r0,r0,rD,rF);
1476 FMA_VEC_SS(r1,r1,rC,rE);
1477 FMA_VEC_SS(r2,r2,rB,rD);
1478 FMA_VEC_SS(r3,r3,rA,rC);
1479 FMA_VEC_SS(r4,r4,r9,rB);
1480 FMA_VEC_SS(r5,r5,r8,rA);
1481
1482 i++;
1483 }
1484 c++;
1485 }
1486
1487 /* Use data so that compiler does not eliminate it when using -O2 */
1488 r0 = ADD_VEC_SS(r0,r1);
1489 r2 = ADD_VEC_SS(r2,r3);
1490 r4 = ADD_VEC_SS(r4,r5);
1491
1492 r0 = ADD_VEC_SS(r0,r6);
1493 r2 = ADD_VEC_SS(r2,r4);
1494
1495 r0 = ADD_VEC_SS(r0,r2);
1496
1497 float out = 0;
1498 SP_SCALAR_TYPE temp = r0;
1499 out += ((float*)&temp)[0];
1500
1501 return out;
1502}
Here is the caller graph for this function:

◆ test_sp_scalar_VEC_FMA_48()

float test_sp_scalar_VEC_FMA_48 ( uint64  iterations)

Definition at line 1507 of file vec_scalar_verify.c.

1507 {
1508 register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
1509
1510 /* Generate starting data */
1511 r0 = SET_VEC_SS(0.01);
1512 r1 = SET_VEC_SS(0.02);
1513 r2 = SET_VEC_SS(0.03);
1514 r3 = SET_VEC_SS(0.04);
1515 r4 = SET_VEC_SS(0.05);
1516 r5 = SET_VEC_SS(0.06);
1517 r6 = SET_VEC_SS(0.07);
1518 r7 = SET_VEC_SS(0.08);
1519 r8 = SET_VEC_SS(0.09);
1520 r9 = SET_VEC_SS(0.10);
1521 rA = SET_VEC_SS(0.11);
1522 rB = SET_VEC_SS(0.12);
1523 rC = SET_VEC_SS(0.13);
1524 rD = SET_VEC_SS(0.14);
1525 rE = SET_VEC_SS(0.15);
1526 rF = SET_VEC_SS(0.16);
1527
1528 uint64 c = 0;
1529 while (c < iterations){
1530 size_t i = 0;
1531 while (i < 1000){
1532
1533 /* The performance critical part */
1534 FMA_VEC_SS(r0,r0,r7,r9);
1535 FMA_VEC_SS(r1,r1,r8,rA);
1536 FMA_VEC_SS(r2,r2,r9,rB);
1537 FMA_VEC_SS(r3,r3,rA,rC);
1538 FMA_VEC_SS(r4,r4,rB,rD);
1539 FMA_VEC_SS(r5,r5,rC,rE);
1540
1541 FMA_VEC_SS(r0,r0,rD,rF);
1542 FMA_VEC_SS(r1,r1,rC,rE);
1543 FMA_VEC_SS(r2,r2,rB,rD);
1544 FMA_VEC_SS(r3,r3,rA,rC);
1545 FMA_VEC_SS(r4,r4,r9,rB);
1546 FMA_VEC_SS(r5,r5,r8,rA);
1547
1548 FMA_VEC_SS(r0,r0,r7,r9);
1549 FMA_VEC_SS(r1,r1,r8,rA);
1550 FMA_VEC_SS(r2,r2,r9,rB);
1551 FMA_VEC_SS(r3,r3,rA,rC);
1552 FMA_VEC_SS(r4,r4,rB,rD);
1553 FMA_VEC_SS(r5,r5,rC,rE);
1554
1555 FMA_VEC_SS(r0,r0,rD,rF);
1556 FMA_VEC_SS(r1,r1,rC,rE);
1557 FMA_VEC_SS(r2,r2,rB,rD);
1558 FMA_VEC_SS(r3,r3,rA,rC);
1559 FMA_VEC_SS(r4,r4,r9,rB);
1560 FMA_VEC_SS(r5,r5,r8,rA);
1561
1562 FMA_VEC_SS(r0,r0,r7,r9);
1563 FMA_VEC_SS(r1,r1,r8,rA);
1564 FMA_VEC_SS(r2,r2,r9,rB);
1565 FMA_VEC_SS(r3,r3,rA,rC);
1566 FMA_VEC_SS(r4,r4,rB,rD);
1567 FMA_VEC_SS(r5,r5,rC,rE);
1568
1569 FMA_VEC_SS(r0,r0,rD,rF);
1570 FMA_VEC_SS(r1,r1,rC,rE);
1571 FMA_VEC_SS(r2,r2,rB,rD);
1572 FMA_VEC_SS(r3,r3,rA,rC);
1573 FMA_VEC_SS(r4,r4,r9,rB);
1574 FMA_VEC_SS(r5,r5,r8,rA);
1575
1576 FMA_VEC_SS(r0,r0,r7,r9);
1577 FMA_VEC_SS(r1,r1,r8,rA);
1578 FMA_VEC_SS(r2,r2,r9,rB);
1579 FMA_VEC_SS(r3,r3,rA,rC);
1580 FMA_VEC_SS(r4,r4,rB,rD);
1581 FMA_VEC_SS(r5,r5,rC,rE);
1582
1583 FMA_VEC_SS(r0,r0,rD,rF);
1584 FMA_VEC_SS(r1,r1,rC,rE);
1585 FMA_VEC_SS(r2,r2,rB,rD);
1586 FMA_VEC_SS(r3,r3,rA,rC);
1587 FMA_VEC_SS(r4,r4,r9,rB);
1588 FMA_VEC_SS(r5,r5,r8,rA);
1589
1590 i++;
1591 }
1592 c++;
1593 }
1594
1595 /* Use data so that compiler does not eliminate it when using -O2 */
1596 r0 = ADD_VEC_SS(r0,r1);
1597 r2 = ADD_VEC_SS(r2,r3);
1598 r4 = ADD_VEC_SS(r4,r5);
1599
1600 r0 = ADD_VEC_SS(r0,r6);
1601 r2 = ADD_VEC_SS(r2,r4);
1602
1603 r0 = ADD_VEC_SS(r0,r2);
1604
1605 float out = 0;
1606 SP_SCALAR_TYPE temp = r0;
1607 out += ((float*)&temp)[0];
1608
1609 return out;
1610}
Here is the caller graph for this function: