/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c Benchmark #7 -- Bit Twiddle c c c The Problem: c c Given {A(i)}, a binary stream of length L, let {B(i)} and c {C(i)} be binary streams formed as follows: c {B(i)} = A(0),A(1),A(2),A(3),A(4),A(11),A(12),...etc. c {C(i)} = A(5),A(6),A(7),A(8),A(9),A(16),A(17),...etc. c c i.e., to form the {B(i)} stream, take 5 from {A(i)}, drop 6, c take 5, drop 6, etc., c and to form the {C(i)} stream, drop 5 from {A(i)}, take 5, c drop 6, take 5, drop 6, and so on (dropping 5 only on the c first instance). c c Calculate the stream {D(i)} defined as c c D(i) = (C(i) ^ B(i+1)) + (~C(i) ^ B(i-1)) c c where c ~X = the complement of X c XOR = exclusive "or" function c ^ = "and" function c + = can be integer add, exclusive "or", or inclusive "or". c They are all equivalent here. c c Then calculate {E(i)} where c c E(i) = C(i) XOR D(i) XOR C(i+37) XOR D(i+37) XOR c C(i+100) XOR D(i+100). c c Now locate all sequences of zeros of length greater than 100 in bit c stream {E(i)}, and output for each sequence of zeros, the c starting position of the sequence in the E stream and the c length of the sequence. c c Parameter: numbufs = 100 c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c Main Program for Benchmark #7 c c Call time parameters: c c numbufs = Number of buffers of the A stream to process c Default = 100 buffers = 72089600000 bits c Maximum = 200 buffers = 144179200000 bits c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */ #include "bench7.h" #define DEFBUFS 100 #define MAXBUFS 200 #define MAXPES 64 #ifdef USE_SHMEM int _num_pes(void); int _my_pe(void); long pSync[_SHMEM_REDUCE_SYNC_SIZE]; #endif // declare these outside main so shmem can find them // make them all 64 bits so shmem_collect works on T3E and on Sierra int64 nn = -1; int64 locnn[MAXPES]; int64 start[MAXANS]; int64 length[MAXANS]; int64 allstart[2000]; int64 alllength[2000]; main( int argc, char**argv ) { static uint64 A[BUFSIZE]; /* Arrays to hold answers - MAXANS defined in bench7.h */ // int64 start[MAXANS]; // int length[MAXANS]; int OK[MAXANS]; /* Error flag array */ int ier[2]; int numbufs = 0, bufnum, seed; //, nn = -1; int ntasks, extra, myfirst, mylast; int npes, mype; int i, j; /* Variables for timing purposes */ double cset, wset, crun, wrun, x0, y0; double cputime(), wall(); int PE0 = 0; int displ[MAXPES]; int globnn; int intnn, intlocnn[MAXPES]; #ifdef USE_SHMEM /* This should come before any other executed code */ # ifndef CRAY shmem_init(); # endif npes = _num_pes(); mype = _my_pe(); #endif #ifdef USE_MPI /* This should come before any other executed code */ MPI_Init(&argc, &argv); /* this assures each PE has access to args */ MPI_Comm_rank(MPI_COMM_WORLD, &mype); MPI_Comm_size(MPI_COMM_WORLD, &npes); #endif /* Get input parameter - number of buffers of A-stream to do */ if (argc > 1) numbufs = atoi(argv[1]); if ( numbufs <= 0 ) numbufs = DEFBUFS; if ( numbufs > MAXBUFS ) numbufs = MAXBUFS; if ( npes > numbufs ) { if (mype == 0) printf( "Sorry, you can't run with npes=%d > numbufs=%d.\n", npes, numbufs ); #ifdef USE_MPI MPI_Finalize(); #endif exit(1); } // Distribute the numbufs buffers evenly among the npes PEs ntasks = numbufs / npes; extra = numbufs - ntasks * npes; myfirst = mype * ntasks + min ( mype, extra ); mylast = (mype+1) * ntasks - 1 + min ( mype+1, extra ); /* SEED for the random number generator in S7 */ seed = 99907; /* Initialize timing variables */ cset = 0.0; wset = 0.0; crun = 0.0; wrun = 0.0; /* Generate and process one buffer at a time - avoid I/O */ for ( bufnum = myfirst; bufnum <= mylast; bufnum++ ) { /* Generate one buffer of bitstream A */ x0 = cputime(); y0 = wall(); #ifdef Debug printf ("call S7 %d %d\n", mype, bufnum); #endif s7 ( A, bufnum, seed ); #ifdef Debug printf ("return S7 %d %d\n", mype, bufnum); #endif x0 = cputime() - x0; y0 = wall() - y0; cset += x0; wset += y0; /* Process a buffer of A */ x0 = cputime(); y0 = wall(); #ifdef Debug printf ("call P7 %d %d %d\n", mype, bufnum, nn); #endif p7 ( A, bufnum, mype, npes, myfirst, mylast, start, length, &nn ); #ifdef Debug printf ("return P7 %d %d %d\n", mype, bufnum, nn); #endif x0 = cputime() - x0; y0 = wall() - y0; crun += x0; wrun += y0; } /* end loop on bufnum */ ++ nn; if (npes > 1) { // Collect all results to PE0 #ifdef USE_MPI // First get counts NN from each PE into array LOCNN on PE0 MPI_Gather ( &nn, 1, MPI_INT64, locnn, 1, MPI_INT64, PE0, MPI_COMM_WORLD ); #endif #ifdef USE_SHMEM for ( i=0; i<_SHMEM_REDUCE_SYNC_SIZE; ++i ) pSync[i] = _SHMEM_SYNC_VALUE; shmem_barrier_all(); // First get counts NN from each PE into array LOCNN on all PEs shmem_collect ( locnn, &nn, 1, 0,0,npes, pSync ); #endif // Set up array of offsets // used for variable gather in MPI, for checking overlap MPI or Shmem if (mype == 0) { displ[0] = 0; for (i = 1; i < npes; i++) displ[i] = displ[i-1] + locnn[i-1]; globnn = displ[npes-1] + locnn[npes-1]; } // Now accumulate results in start and length on PE0 #ifdef USE_MPI // reduce counts from int64 to int intnn = nn; if (mype == 0) // only PE0 has locnn for (i=0; i 1 // Now results are in order on PE0, which checks and prints if (mype == 0) { if (npes > 1) { // FIRST check for overlap at boundary between PEs // If found, combine overlapping intervals & slide arrays down 1 place // Must loop backwards to keep boundaries in right place for ( i = npes-1; i > 0; i-- ) { if ( start[1+displ[i]] <= (start[displ[i]] + length[displ[i]]) ) { length[displ[i]] = start[1+displ[i]] + length[1+displ[i]] - start[displ[i]]; for ( j = displ[i]+1; j < globnn; j++ ) { start[j-1] = start[j]; length[j-1] = length[j]; } globnn--; } } nn = globnn; } // npes > 1 #ifdef Debug printf ( "call C7\n" ); #endif // Check the results c7 ( numbufs, start, length, OK, nn, ier ); #ifdef Debug printf ( "return C7\n" ); #endif #ifdef Debug printf ( "call R7\n" ); #endif // Output results r7 ( numbufs, start, length, OK, nn, ier, npes, cset, wset, crun, wrun ); #ifdef Debug printf ( "return R7\n" ); #endif } // end if mype == 0 to check & print results #ifdef USE_MPI MPI_Finalize(); #endif return; }