PROGRAM M8D c ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c CVS Info c \$Date: 2005/01/10 21:55:51 \$ c \$Revision: 1.2 \$ c \$RCSfile: m8dp_pre.f,v \$ c \$Name: rel_5 \$ c Benchmark #8 -- Dynamic Program c c Small dense matrices (Part a) c c Assumption: All values in the A matrices are nonnegative c (Since these are state transition probabilities, this c is a reasonable assumption.) c c The Problem: c c Given: A[1],A[2],...A[K], N x N floating point matrices c D, an 8-bit integer vector of length T where c 1 <= D(t) <= K for t = 1,2,...T c c Calculate: Y[0],Y[1],...Y[T] 64-bit floating point vectors of length N c P[1],P[2],...P[T] 32-bit integer vectors of length N c B, a T+1 long 32-bit integer vector where c 1 <= B(t) <= N for t = 0,1,2,...T c according to the following algorithm. c c Y[0](i) = 1 for i = 1,2,...N c For t = 1,2,...,T c k = D(t) c For j = 1,2,...,N c Y[t](j) = max Y[t-1](i)*A[k](i,j) c i c P[t](j) = any i which maximized Y[t](j) c Next j c Next t c c Let Z = max Y[T](i), and let I be any i which maximized Z. c i c c Set B(T) = I c For t = T-1,T-2,...,1,0 c Set B(t) = P[t+1](B(t+1)) c Next t c c Output Z and B(t) for t = 0,1,2,...T. c c Parameters: N = 10 K = 30 T = 100,000 ITER = 400 c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c Main Program for the small dense matrix version of Benchmark #8 c c Call time parameters: c c N = Size of the A matrices c Maximum = 30 c Default = 10 c c K = Number of A matrices c Maximum = 50 c Default = 30 c c T = Length of D array c Maximum = Default = 100 000 c c ITER = Number of times to repeat the experiment c Maximum = 1000 c Default = 400 c c NOTE: In order to remove the complexity of using IO c to secondary storage, this benchmark is repeated ITER c times (on different data each time) and the time is c accumulated. This will NOT be considered a legitimate loop c for parallelization, since it is done ONLY to reduce the c complexity of the problem. c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc C--Include for mpi header file include 'mpif.h' C--CVS variable declaration TYPE CVS sequence character( 160 ) string integer stringend END TYPE CVS C--CVS initilaize variables TYPE( CVS ),save :: CVS_INFO = \$ CVS("BMARKGRP \$Date: 2005/01/10 21:55:51 \$ \$Revision: 1.2 \$" // \$ "\$RCSfile: m8dp_pre.f,v \$ \$Name: rel_5 \$", 0) C--Variables for timing purposes REAL CPUTIME,WALL,X0,Y0,CSET,WSET,CRUN,WRUN REAL ALLTIMES(14) C--Array for run time parameters CHARACTER*80 ARG, NAME C--Extra parameters for PXFGETARG INTEGER ARG_LEN, G_ARG_ERR C--The maximum size of the matrices and vectors PARAMETER (MN=30) C--The maximum number of matrices PARAMETER (MK=50) C--The maximum length of the D array PARAMETER (MT = 100 000) C--Sizes of the matrices INTEGER ASIZE,TSIZE,AASIZE C--Which "A" matrix to use in each step. INTEGER D(MT) C--The matrices REAL A(MN*MN*MK*2) C--The best path INTEGER B(0:MT) C--The scaling factor which to multiply the columns of AA by in order to C--obtain the A matrices REAL TIMES(MN*MK) REAL AA(MN*MN) C--Input parameters - T_PARAM holds the original T value INTEGER T,T_PARAM,N,K C--Error flag array C--change size of array from 3 to 1000 for MPI version INTEGER IER(1000) c-mpl-- buffer for default values in MPL c-mpl c-mpl 1 status c-mpl INTEGER status(MPI_STATUS_SIZE) C--The number of processors INTEGER NUMPES C--My processor id INTEGER MYPE C--Error (last argument in MPI call) INTEGER IERR INTEGER IT C--Declare the IONODE INTEGER IONODE common /MPISTUFF/IONODE C--Initialize the program for 1 CALL MPI_INIT(IERR) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NUMPES, IERR) CALL MPI_COMM_RANK(MPI_COMM_WORLD, MYPE, IERR) C--Set the IONODE to 0 IONODE=0 C--GET AND TEST INPUT PARAMETERS C--Get N, the matrix size #ifdef POSIX CALL PXFGETARG(1,ARG,ARG_LEN,G_ARG_ERR) #else CALL GETARG(1,ARG) #endif READ(ARG,5) N 5 FORMAT(I15) c Default IF(N .LE. 0) N = 10 c Check for N in range IF(N .GT. MN) THEN PRINT 15, MN,N 15 FORMAT('Maximum allowed value for N is ',I5,' N = ',I5) STOP ENDIF C--Get K, the number of matrices #ifdef POSIX CALL PXFGETARG(2,ARG,ARG_LEN,G_ARG_ERR) #else CALL GETARG(2,ARG) #endif READ(ARG,5) K c Default IF(K .LE. 0) K = 30 c Check for K in range IF(K .GT. MK) THEN PRINT 25, MK, K 25 FORMAT('Maximum allowed value for K is ',I5,' K = ',I5) STOP ENDIF C--Get T, the length of the D array #ifdef POSIX CALL PXFGETARG(3,ARG,ARG_LEN,G_ARG_ERR) #else CALL GETARG(3,ARG) #endif READ(ARG,5) T c Default IF(T .LE. 0) T = 100 000 c Check for T in range IF(T .GT. MT) THEN PRINT 35, MT, T 35 FORMAT('Maximum allowed value for T is ',I9,' T = ',I9) STOP ENDIF C--Change the input value T so it is divisible by 3*NUMPES C--T_PARAM maintains T's original value T_PARAM = T T= T/(3*NUMPES) T = T*3*NUMPES IF (MYPE.EQ.IONODE) THEN PRINT 40 40 FORMAT(/,'Modify T so it is evenly divisible by 3*NUMPES') PRINT 45,T 45 FORMAT('T is now ',I10) ENDIF C--Get ITER, the number of iterations of this benchmark to do #ifdef POSIX CALL PXFGETARG(4,ARG,ARG_LEN,G_ARG_ERR) #else CALL GETARG(4,ARG) #endif READ(ARG,5) ITER c Default IF(ITER .LE. 0) ITER = 400 c Check to see if ITER is in range IF(ITER .GT. 1000) ITER = 400 C--START THE ITERATIONS IF (MYPE.EQ.IONODE) THEN PRINT 50,N,K,T,ITER 50 FORMAT('Benchmark 8 running ... N,K,T,ITER = ',2I4,I8,I4) c-void-Initialize the random number generator for setup c-void-Only IONODE processor needs IRAND initialized c-void I = IRAND(-99908) c PRINT *,'Random number initialized',I ENDIF C--Initialize the random number generator for setup on all PEs I = IRAND(-99908) C--Initialize timing variables CSET=0.0 WSET=0.0 CRUN=0.0 WRUN=0.0 ALLTIMES(1) = 0 ALLTIMES(2) = 0 ALLTIMES(3) = 0 ALLTIMES(4) = 0 ALLTIMES(5) = 0 ALLTIMES(6) = 0 ALLTIMES(7) = 0 ALLTIMES(8) = 0 ALLTIMES(9) = 0 ALLTIMES(10) = 0 ALLTIMES(11) = 0 ALLTIMES(12) = 0 ALLTIMES(13) = 0 ALLTIMES(14) = 0 c DO IT=1,14 c ALLTIMES(IT) = 0.0 c ENDDO C--For each iteration of the benchmark Do JTER=1,ITER IF (MYPE.EQ.IONODE) THEN PRINT 55,JTER,ITER 55 FORMAT('Benchmark 8 ... begin iteration ',I5,' of ',I5) ENDIF C--Generate D, and matrices A, and array TIMES C--Setup now done on all processors - cuts out the broadcasts X0 = CPUTIME() Y0 = WALL() c-void-Have node 0 generate the D, A and TIMES and broadcast to all others c-void IF (MYPE.EQ.0) THEN CALL S8D(N, K, T_PARAM, D, A, TIMES, AA) c-void ENDIF c-void CALL MPI_BCAST(D,MT,MPI_INTEGER,IONODE,MPI_COMM_WORLD,IERR) c-void ASIZE = MN*MN*MK*2 c-void CALL MPI_BCAST(A,ASIZE,MPI_REAL,IONODE,MPI_COMM_WORLD,IERR) c-void TSIZE = MN*MK*2 c-void CALL MPI_BCAST(TIMES,TSIZE,MPI_REAL,IONODE,MPI_COMM_WORLD,IERR) c-void AASIZE= MN*MN*2 c-void CALL MPI_BCAST(AA,AASIZE,MPI_REAL,IONODE,MPI_COMM_WORLD,IERR) CSET = CSET + (CPUTIME() - X0) WSET = WSET + (WALL() - Y0) c PRINT *,'Benchmark 8 ... setup done' C--Time the actual work being done in subroutine P8D X0 = CPUTIME() Y0 = WALL() CALL P8D(MYPE,NUMPES,N,K,T_PARAM,T,A,D,B,TIMES,AA,Z, \$ ALLTIMES) CRUN = CRUN + (CPUTIME() - X0) WRUN = WRUN + (WALL() - Y0) c IF (MYPE.EQ.IONODE) THEN c PRINT *,'Benchmark 8: proc done for loop ',JTER c ENDIF Enddo ! iterations C--Check the results if the IONODE IF (MYPE.EQ.IONODE) THEN CALL C8D(N,K,T_PARAM,D,A,B,Z,ZZ,JTER-1,IER) c PRINT *,'Benchmark 8 ... verify done' ENDIF C--Output results c IF (MYPE.EQ.IONODE) THEN ALLTIMES(1) = CSET ALLTIMES(2) = WSET ALLTIMES(3) = CRUN ALLTIMES(4) = WRUN CALL R8D(N,K,T_PARAM,ITER,MYPE,NUMPES,B,Z,ZZ,ALLTIMES,IER) c ENDIF c-no exit on MPI program Exit 1 CALL MPI_Finalize(IERR) END