
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "tests/malloc.h"

typedef  unsigned char           UChar;
typedef  unsigned int            UInt;
typedef  unsigned long int       UWord;
typedef  unsigned long long int  ULong;

#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))

typedef  union { UChar u8[32];  UInt u32[8];  }  YMM;

typedef  struct {  YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; }  Block;

void showYMM ( YMM* vec )
{
   int i;
   assert(IS_32_ALIGNED(vec));
   for (i = 31; i >= 0; i--) {
      printf("%02x", (UInt)vec->u8[i]);
      if (i > 0 && 0 == ((i+0) & 7)) printf(".");
   }
}

void showBlock ( char* msg, Block* block )
{
   printf("  %s\n", msg);
   printf("    "); showYMM(&block->a1); printf("\n");
   printf("    "); showYMM(&block->a2); printf("\n");
   printf("    "); showYMM(&block->a3); printf("\n");
   printf("    "); showYMM(&block->a4); printf("\n");
   printf("    %016llx\n", block->u64);
}

UChar randUChar ( void )
{
   static UInt seed = 80021;
   seed = 1103515245 * seed + 12345;
   return (seed >> 17) & 0xFF;
}

void randBlock ( Block* b )
{
   int i;
   UChar* p = (UChar*)b;
   for (i = 0; i < sizeof(Block); i++)
      p[i] = randUChar();
}


/* Generate a function test_NAME, that tests the given insn, in both
   its mem and reg forms.  The reg form of the insn may mention, as
   operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14.  The mem form of
   the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
   and %r14.  It's OK for the insn to clobber ymm0, as this is needed
   for testing PCMPxSTRx, and ymm6, as this is needed for testing
   MOVMASK variants. */

#define GEN_test_RandM(_name, _reg_form, _mem_form)   \
    \
    __attribute__ ((noinline)) static void test_##_name ( void )   \
    { \
       Block* b = memalign32(sizeof(Block)); \
       randBlock(b); \
       printf("%s(reg)\n", #_name); \
       showBlock("before", b); \
       __asm__ __volatile__( \
          "vmovdqa   0(%0),%%ymm7"  "\n\t" \
          "vmovdqa  32(%0),%%ymm8"  "\n\t" \
          "vmovdqa  64(%0),%%ymm6"  "\n\t" \
          "vmovdqa  96(%0),%%ymm9"  "\n\t" \
          "movq    128(%0),%%r14"   "\n\t" \
          _reg_form   "\n\t" \
          "vmovdqa %%ymm7,  0(%0)"  "\n\t" \
          "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
          "vmovdqa %%ymm6, 64(%0)"  "\n\t" \
          "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
          "movq    %%r14, 128(%0)"  "\n\t" \
          : /*OUT*/  \
          : /*IN*/"r"(b) \
          : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
       ); \
       showBlock("after", b); \
       randBlock(b); \
       printf("%s(mem)\n", #_name); \
       showBlock("before", b); \
       __asm__ __volatile__( \
          "leaq      0(%0),%%rax"  "\n\t" \
          "vmovdqa  32(%0),%%ymm8"  "\n\t" \
          "vmovdqa  64(%0),%%ymm7"  "\n\t" \
          "vmovdqa  96(%0),%%ymm9"  "\n\t" \
          "movq    128(%0),%%r14"   "\n\t" \
          _mem_form   "\n\t" \
          "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
          "vmovdqa %%ymm7, 64(%0)"  "\n\t" \
          "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
          "movq    %%r14, 128(%0)"  "\n\t" \
          : /*OUT*/  \
          : /*IN*/"r"(b) \
          : /*TRASH*/"xmm6", \
                     "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
       ); \
       showBlock("after", b); \
       printf("\n"); \
       free(b); \
    }

#define GEN_test_Ronly(_name, _reg_form) \
   GEN_test_RandM(_name, _reg_form, "")
#define GEN_test_Monly(_name, _mem_form) \
   GEN_test_RandM(_name, "", _mem_form)

GEN_test_RandM(VCVTPH2PS_128,
               "vcvtph2ps %%xmm6, %%xmm8",
               "vcvtph2ps (%%rax), %%xmm8");
GEN_test_RandM(VCVTPH2PS_256,
               "vcvtph2ps %%xmm6, %%ymm8",
               "vcvtph2ps (%%rax), %%ymm8");

GEN_test_RandM(VCVTPS2PH_128_0,
               "vcvtps2ph $0, %%xmm8, %%xmm6",
               "vcvtps2ph $0, %%xmm8, (%%rax)");
GEN_test_RandM(VCVTPS2PH_256_0,
               "vcvtps2ph $0, %%ymm8, %%xmm6",
               "vcvtps2ph $0, %%ymm8, (%%rax)");

GEN_test_RandM(VCVTPS2PH_128_1,
               "vcvtps2ph $1, %%xmm8, %%xmm6",
               "vcvtps2ph $1, %%xmm8, (%%rax)");
GEN_test_RandM(VCVTPS2PH_256_1,
               "vcvtps2ph $1, %%ymm8, %%xmm6",
               "vcvtps2ph $1, %%ymm8, (%%rax)");

GEN_test_RandM(VCVTPS2PH_128_2,
               "vcvtps2ph $2, %%xmm8, %%xmm6",
               "vcvtps2ph $2, %%xmm8, (%%rax)");
GEN_test_RandM(VCVTPS2PH_256_2,
               "vcvtps2ph $2, %%ymm8, %%xmm6",
               "vcvtps2ph $2, %%ymm8, (%%rax)");

GEN_test_RandM(VCVTPS2PH_128_3,
               "vcvtps2ph $3, %%xmm8, %%xmm6",
               "vcvtps2ph $3, %%xmm8, (%%rax)");
GEN_test_RandM(VCVTPS2PH_256_3,
               "vcvtps2ph $3, %%ymm8, %%xmm6",
               "vcvtps2ph $3, %%ymm8, (%%rax)");

GEN_test_RandM(VCVTPS2PH_128_4,
               "vcvtps2ph $4, %%xmm8, %%xmm6",
               "vcvtps2ph $4, %%xmm8, (%%rax)");
GEN_test_RandM(VCVTPS2PH_256_4,
               "vcvtps2ph $4, %%ymm8, %%xmm6",
               "vcvtps2ph $4, %%ymm8, (%%rax)");

/* Comment duplicated above, for convenient reference:
   Allowed operands in test insns:
     Reg form:  %ymm6,  %ymm7, %ymm8, %ymm9 and %r14.
     Mem form:  (%rax), %ymm7, %ymm8, %ymm9 and %r14.
   Imm8 etc fields are also allowed, where they make sense.
   Both forms may use ymm0 as scratch.  Mem form may also use
   ymm6 as scratch.
*/

#define N_DEFAULT_ITERS 3

// Do the specified test some number of times
#define DO_N(_iters, _testfn) \
   do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)

// Do the specified test the default number of times
#define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)


int main ( void )
{
   DO_D( VCVTPH2PS_128 );
   DO_D( VCVTPH2PS_256 );

   DO_D( VCVTPS2PH_128_0 );
   DO_D( VCVTPS2PH_256_0 );

   DO_D( VCVTPS2PH_128_1 );
   DO_D( VCVTPS2PH_256_1 );

   DO_D( VCVTPS2PH_128_2 );
   DO_D( VCVTPS2PH_256_2 );

   DO_D( VCVTPS2PH_128_3 );
   DO_D( VCVTPS2PH_256_3 );

   DO_D( VCVTPS2PH_128_4 );
   DO_D( VCVTPS2PH_256_4 );

   return 0;
}
