/************************************************************************** * * Subroutine S11 builds the "random" matrices A and B. * * Parameters: * * Provided by calling routine: * N = The size of the data to be multiplied * Q = The size of the grid of PEs, Q x Q * M = The size of the sub-matrices on each PE * NUMSIZE = The number of words used to store an MP integer * (passed in so S11 can declare arrays with exact dimensions) * MODBITS = The number of bits in MODULUS * * Returned by this routine: * A = Matrix of multiple precision numbers * B = Matrix of multiple precision numbers * *************************************************************************** * * Modified for non-shared-memory systems * Each PE fills its own resident sub-matrices of A, B * They are of size M x M, where M = Ceiling(N/Q) * The effective size of the matrices holding the data is M * Q * **************************************************************************/ /* CVS info */ /* \$Date: 2005/09/19 18:48:18 \$ */ /* \$Revision: 1.3 \$ */ /* \$RCSfile: s11.c,v \$ */ /* \$Name: rel_5 \$ */ #include "bench11.h" #ifdef CRAY # include #else # include #endif static char cvs_info[] = "BMARKGRP \$Date: 2005/09/19 18:48:18 \$ \$Revision: 1.3 \$ \$RCSfile: s11.c,v \$ \$Name: rel_5 \$"; /* * Fortran can define A(NUMSIZE,M,M) with variable dimensions. * this kludge in C lets us use same notation as if we declared a[m][m][numsize] * where m and numsize are arguments of this subr call * The 3rd dimension is for the MP integers which are vectors of 64-bit ints */ #define a(I,J,K) a[ (K) + numsize * ( (J) + m * (I) ) ] #define b(I,J,K) b[ (K) + numsize * ( (J) + m * (I) ) ] void s11 ( mplong a[], mplong b[], int n, int q, int m, int numsize, int modbits ) { int abcnumsize; int fullwords, extrabits; int skipbits, bitctr, wordctr; mplong modsize, asize, bsize, extrabitmask; /* Array to hold enough bits for one row (N long) of A and B */ unsigned mplong * tempbits; unsigned mplong temp; /* Other variables */ int irow; mplong myfirst, mylast, ranwds; int i, j, k; int perow, pecol, q0, m0, mi, mj; int one = 1, seed = -99911; /* these are set in M11 */ extern int mype, npes, verbose; abcnumsize = numsize - MPHL; if (verbose) { printf( "CALL S11(%d, modulus, -, -) \n", n ); printf( " (Set up the matrices) \n" ); } modsize = numsize - MPHL; /* Set up counters and mask needed to fill A and B */ fullwords = modbits / BITSPERWORD; extrabits = modbits % BITSPERWORD; if ( extrabits > 0 ) #if (BITSPERWORD == 64) extrabitmask = ((unsigned mplong)(-1L)) >> (BITSPERWORD - extrabits); #else /* BPW == 32 */ extrabitmask = BitMask32 >> (BITSPERWORD - extrabits); #endif if (verbose) { printf( " Modulus has %d bits \n", modbits ); printf( " Fill the matrices \n" ); } /************************************************************************** * * Use the random number generator to fill the matrices A and B. * * Place the random bits into the multiple precision numbers which * make up A and B. * * Each PE fills the M x M submatrix of A and of B that resides on that PE. * For each row in its block, the PE generates all the random bits * for that row, and then fills only the appropriate columns of A and B. * *************************************************************************** * * If M * Q > N, be careful to random-fill only the data cells. * Some PEs have some zero rows or cols or both, some may be all zero. * ROW, COL are zero-based indices of this PE in the Q x Q array of PEs. * **************************************************************************/ perow = mype / q; pecol = mype % q; q0 = n / m; m0 = n % m; /* MI, MJ are loop bounds for I and J They may be < M for PE on right or bottom edge */ mi = m; mj = m; /* Initialize the structures for the multi-precision integers Just the local submatrices */ for ( i = 0; i < m; i++ ) { for ( j = 0; j < m; j++ ) { MPINIT ( &a(i,j,0), abcnumsize, modsize ); MPINIT ( &b(i,j,0), abcnumsize, modsize ); } } /* If say N=225 inside 256-size array, zeroize edges */ if ( perow == q0 ) { for ( i = m0; i < m; i++ ) { for ( j = 0; j < m; j++ ) { MPSETSIZE ( &a(i,j,0), 0 ); MPSETSIZE ( &b(i,j,0), 0 ); a(i,j,MPDATA) = 0; b(i,j,MPDATA) = 0; } } mi = m0; } if ( pecol == q0 ) { for ( i = 0; i < m; i++ ) { for ( j = m0; j < m; j++ ) { MPSETSIZE ( &a(i,j,0), 0 ); MPSETSIZE ( &b(i,j,0), 0 ); a(i,j,MPDATA) = 0; b(i,j,MPDATA) = 0; } } mj = m0; } if ( (perow > q0) || (pecol > q0) ) { for ( i = 0; i < m; i++ ) { for ( j = 0; j < m; j++ ) { MPSETSIZE ( &a(i,j,0), 0 ); MPSETSIZE ( &b(i,j,0), 0 ); a(i,j,MPDATA) = 0; b(i,j,MPDATA) = 0; } } } /* RANWDS 64-bit random binary words will fill a row of A and B */ ranwds = ( (2*n*modbits) + 63 ) / 64; tempbits = (unsigned mplong *) malloc( 8 * ranwds + 16 ); for ( i = 0; i < mi; i++ ) { /* Generate random bits for this (global) row # of A and B in the array tempbits */ irow = 1 + i + m * perow; /* add 1 to agree with Fortran subr */ get_my_bdata64( irow, 1, 1, ranwds, 1, seed, tempbits, &myfirst, &mylast); /* skip bits in TEMPBITS until the ones that go in my subblock */ skipbits = (2 * modbits) * (pecol * m); /* These counters keep track of position in tempbits */ wordctr = skipbits / 64; bitctr = skipbits % 64; for ( j = 0; j < mj; j++ ) { /* Fill the low order words of A(I,J) */ for ( k = MPDATA; k < MPDATA + fullwords; k++ ) { #if (BITSPERWORD==64) /* need this test in case << 64 is a no-op (ex: Alpha) */ if ( bitctr == 0 ) temp = tempbits[wordctr]; else temp = ( ( tempbits[wordctr] >> bitctr ) | ( tempbits[wordctr+1] << 64-bitctr ) ); a(i,j,k) = temp; wordctr++; #else if ( bitctr == 0 ) temp = tempbits[wordctr]; else temp = ( ( tempbits[wordctr] >> bitctr ) | ( tempbits[wordctr+1] << 64-bitctr ) ); a(i,j,k) = temp & BitMask; bitctr += BITSPERWORD; if ( bitctr >= 64 ) { wordctr++; bitctr -= 64; } #endif } /* Partially fill the last word of A(I,J) with EXTRABITS bits */ if (extrabits) { k = MPDATA + fullwords; if ( bitctr == 0 ) temp = tempbits[wordctr]; else temp = ( ( tempbits[wordctr] >> bitctr ) | ( tempbits[wordctr+1] << 64-bitctr ) ); a(i,j,k) = temp & extrabitmask; bitctr += extrabits; if ( bitctr >= 64 ) { wordctr++; bitctr -= 64; } } /* Fill the low order words of B(I,J) */ for ( k = MPDATA; k < MPDATA + fullwords; k++ ) { #if (BITSPERWORD==64) if ( bitctr == 0 ) temp = tempbits[wordctr]; else temp = ( ( tempbits[wordctr] >> bitctr ) | ( tempbits[wordctr+1] << 64-bitctr ) ); b(i,j,k) = temp; wordctr++; #else if ( bitctr == 0 ) temp = tempbits[wordctr]; else temp = ( ( tempbits[wordctr] >> bitctr ) | ( tempbits[wordctr+1] << 64-bitctr ) ); b(i,j,k) = temp & BitMask; bitctr += BITSPERWORD; if ( bitctr >= 64 ) { wordctr++; bitctr -= 64; } #endif } /* Partially fill the last word of B(I,J) with EXTRABITS bits */ if (extrabits) { k = MPDATA + fullwords; if ( bitctr == 0 ) temp = tempbits[wordctr]; else temp = ( ( tempbits[wordctr] >> bitctr ) | ( tempbits[wordctr+1] << 64-bitctr ) ); b(i,j,k) = temp & extrabitmask; bitctr += extrabits; if ( bitctr >= 64 ) { wordctr++; bitctr -= 64; } } } /* loop on j */ } /* loop on i */ free ( tempbits ); /* * Go through the entries of A and B. * If the top order word(s) = zero, reset the usage counter. * This normalizes the form of the multiple precision number. */ for ( i = 0; i < mi; i++ ) for ( j = 0; j < mj; j++ ) { /* Loop bounds mi and mj skip words in border of zeros */ asize = MPGETSIZE ( &a(i,j,0) ); if (asize) { /* work backwards from highest-order limb */ for ( k = numsize - 1; k >= MPDATA; k-- ) { if ( a(i,j,k) == 0 ) asize--; else break; } MPSETSIZE ( &a(i,j,0), asize ); } bsize = MPGETSIZE ( &b(i,j,0) ); if (bsize) { for ( k = numsize - 1; k >= MPDATA; k-- ) { if ( b(i,j,k) == 0 ) bsize--; else break; } MPSETSIZE ( &b(i,j,0), bsize ); } } /* end loop on i, j */ /* if very verbose, test for consistency by printing the filled matrices */ if ( verbose > 1 ) { printf( "S11 Consistency Check - Matrices A and B: \n" ); for ( k = 0; k < npes; k++ ) { if ( mype == k ) { printf("A on PE %d \n", k); for ( i = 0; i < m; i++ ) { printf( "Row %d \n", i ); for ( j = 0; j < m; j++ ) { mpwrite( &a(i,j,0) ); printf ( "\n" ); } } printf("B on PE %d \n", k); for ( i = 0; i < m; i++ ) { printf( "Row %d \n", i ); for ( j = 0; j < m; j++ ) { mpwrite( &b(i,j,0) ); printf ( "\n" ); } } } /* mype==k */ shmem_barrier_all(); } /* for k */ /* for debugging, may just want to see the matrices and exit */ /* exit(0); */ } /* verbose>1 */ }