SUBROUTINE P8D(A,D,B,N,K,T,TIMES,AA,Z)
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Benchmark #8 -- Dynamic Program
c Small dense matrices
c
c Parameters;
c
c Provided by the calling routine:
c A = K long array of N by N matrices
c D = T long array of integers between 1 and K used to
c select the appropriate A matrix
c N = The size of the A matrices
c K = The number of A matrices
c T = The length of D, and one less than the length of B
c TIMES = Related to the formation of A
c AA = With TIMES related to the formation of A
c
c Returned by this routine:
c B = T+1 long array containing the best path
c Z = The probability of this best path
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Basic Algorithm
c
c The basic algorithm is given in the problem.
c
c There are a number of ideas to improve this algorithm. First, the
c floating point numbers involved tend to go out of bounds. This is
c solved by noting that the i which maximizes
c Y[t](j) = max Y[t-1](i)*A[k](i,j)
c also maximizes
c Log (Y[t](j)) = max Log (Y[t-1](i)) + Log (A[k](i,j))
c (The matrices are non-negative.) Thus, all work will be done in the
c log domain. Zeros in the matrices, if any, will be replaced with
c very small values.
c
c The second is the idea that we can pre-compute all possible products
c of pairs or even trios of matrices, and then double or even triple
c step through the Y vectors. Even if we do not wish to compute
c all possible trios of matrices, because of the nature of the matrices,
c they are the result of multiplying a single matrix by the K different
c vectors in array TIMES, we can still precompute A[D(t)]*A[D(t+1)]*A
c and triple step with work proportional to N*N + N.
c
c The third idea is to note that the Y vectors are not output. Thus,
c we need to keep only the last and current copies of Y at any time.
c If memory becomes a problem, we can also pack the P array, but this
c will not be done here.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
USE LIMITS
c The maximum size of the matrices and vectors
PARAMETER (MN=B8_MN)
c
c The maximum number of matrices
PARAMETER (MK=B8_MK)
c
c The maximum length of the D stream
PARAMETER (MT = B8_MT)
c
c The size of the matrices and vectors
INTEGER N
c
c The number of matrices
INTEGER K
c
c The K NxN matrices
REAL A(N,N,K)
c
c The number of steps
INTEGER T
c
c The vector specifying which of the K matrices to use in each step
INTEGER D(T)
c
c The vectors formed after each step
REAL Y(MN,0:1)
c
c The "best" path through the steps
INTEGER B(0:T)
c
c The vectors of which element of the matrix formed the maximum
INTEGER P(MN,MT)
c
c The lookup table of precomputed pseudo products of all permutations of
c the K matrices taken in groups of two or three
REAL TAB(MN,MN,MK,MK)
c
c A table of the best path through each precomputed matrix
INTEGER TABP(MN,MN,MK,MK,2)
c
c Base matrix to form the K actual matrices
REAL TIMES(N,K)
c
c Second base matrix to form the K actual matrices
REAL AA(N,N)
c
c The user-supplied minimum allowed floating point number
c PARAMETER (FPMIN=1.0E-2465)
PARAMETER (FPMIN=1.0E-307)
c
c Used to time the various subportions of this program
REAL CPUTIME,WALL,X0,Y0
c
C--CVS variable declaration
TYPE CVS
sequence
character( 160 ) string
integer stringend
END TYPE CVS
C--CVS initilaize variables
TYPE( CVS ),save :: CVS_INFO =
$ CVS("BMARKGRP $Date: 2005/01/07 23:07:10 $ $Revision: 1.2 $" //
$ "$RCSfile: p8d.f,v $ $Name: rel_5 $", 0)
c
c Check the input parameters to see that there is sufficient space.
IF(MN .LT. N) THEN
PRINT 5,MN,N
5 FORMAT('Insufficient space in working arrays for P8D.',/,
$ 'Size available from parameter MN = ',I5,/,
$ 'Needed = ',I6)
STOP
ENDIF
c
IF(MK .LT. K) THEN
PRINT 10,MK,K
10 FORMAT('Insufficient space in working arrays for P8D.',/,
$ 'Size available from parameter MK = ',I5,/,
$ 'Needed = ',I6)
STOP
ENDIF
c
IF(MT .LT. T) THEN
PRINT 15,MT,T
15 FORMAT('Insufficient space in working arrays for P8D.',/,
$ 'Size available from parameter MT = ',I10,/,
$ 'Needed = ',I10)
STOP
ENDIF
c
c
c NOTE: The code for timings and region identification are
c provided for internal instrumentation only. To use, just
c uncomment the code. Do not include these times in the final
c report.
c
c
c Convert matrices to logs and generate tables for three-stepping
c
Cc X0 = CPUTIME()
Cc Y0 = WALL()
CALL G8D(A,AA,TAB,TABP,TIMES,N,K,T,MN,MK,FPMIN)
Cc X0 = CPUTIME() - X0
Cc Y0 = WALL() -Y0
Cc PRINT 17, X0, Y0
Cc 17 FORMAT('Time to set up three-stepping tables',/,
Cc $ ' CPU = ',F12.4,' seconds.',/,
Cc $ ' Wall Clock = ',F12.4,' seconds.',/)
c
c
c Initialize the initial Y array
DO 20 I=1,N
Y(I,0) = 0
20 CONTINUE
IBUF = 1
IBUF2 = 0
c
c If T is not a multiple of 3, or T is too small to warrant triple
c stepping do a few single steps
II = T/3
II = T - II*3
IF(T .LE. (3*K*K*N)) II = T
c
c
Cc PRINT 23,II
Cc 23 FORMAT('Single step region = ',I5,//)
c
c
IF(II .NE. 0) THEN
DO 50 I=1,II
K1 = D(I)
DO 30 J=1,N
Y(J,IBUF) = Y(1,IBUF2) + A(1,J,K1)
P(J,I) = 1
30 CONTINUE
c
DO 40 J=2,N
DO 35 KK=1,N
TEMP = Y(J,IBUF2) + A (J,KK,K1)
IF(TEMP .GT. Y(KK,IBUF)) THEN
Y(KK,IBUF) = TEMP
P(KK, I) = J
ENDIF
35 CONTINUE
40 CONTINUE
c
IBUF2 = IBUF
IBUF = 1 - IBUF
50 CONTINUE
ENDIF
c
c Now do the remaining portion taking three steps at a time
IF(II .LT. T) THEN
DO 100 I=II+1,T-2,3
K1 = D(I)
K2 = D(I+1)
K3 = D(I+2)
c
DO 60 J=1,N
Y(J,IBUF) = Y(1,IBUF2) + TAB(J,1,K1,K2)
P(J,I) = 1
60 CONTINUE
c
DO 80 J=2,N
DO 70 KK = 1, N
TEMP = Y(J,IBUF2) + TAB(KK,J,K1,K2)
IF(TEMP .GT. Y(KK,IBUF)) THEN
Y(KK,IBUF) = TEMP
P(KK,I) = J
ENDIF
70 CONTINUE
80 CONTINUE
c
c Now correct for D(I+2)
DO 90 KK=1,N
Y(KK,IBUF) = Y(KK,IBUF) + TIMES(KK,K3)
90 CONTINUE
c
c Update buffer pointers
IBUF2 = IBUF
IBUF = 1 - IBUF
100 CONTINUE
ENDIF
c
c Now find the end of the best path
Z = Y(1,IBUF2)
IJ = 1
DO 110 KK=2,N
IF(Y(KK,IBUF2) .GT. Z) THEN
Z = Y(KK,IBUF2)
IJ = KK
ENDIF
110 CONTINUE
c
c Finally backtrack to generate B
B(T) = IJ
c
c Triple step region
IF(II .LT. T) THEN
DO 120 I=T-2,II+1,-3
IJ = B(I+2)
K1 = D(I)
K2 = D(I+1)
J = P(IJ,I)
B(I+1) = TABP(IJ,J,K1,K2,2)
B(I) = TABP(IJ,J,K1,K2,1)
B(I-1) = J
120 CONTINUE
ENDIF
c
c Single step region
IF(II .GT. 0) THEN
DO 130, I=II,1,-1
B(I-1) = P(B(I),I)
130 CONTINUE
ENDIF
c
RETURN
END
c
c
c
c
SUBROUTINE G8D(A,AA,TAB,TABP,TIMES,N,K,T,MN,MK,FPMIN)
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Subroutine to take the log of the A matrices, and to
c generate the tables to be used in the 3-stepping.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c The maximum size of the matrices and vectors
PARAMETER (MM=30)
REAL A(N,N,K)
REAL AA(N,N)
REAL TAB(MN,MN,MK,MK)
INTEGER TABP(MN,MN,MK,MK,2)
REAL TIMES(N,K)
REAL VTEMP(MM,MM)
INTEGER PTEMP(MM,MM)
INTEGER T
c
c Some checking to make sure that arrays are large enough.
IF(MM .LT. N) THEN
PRINT 5,MM,N
5 FORMAT('Insufficient space in working arrays for G8D.',/,
$ 'Size available from parameter MM = ',I5,/,
$ 'Needed = ',I6)
STOP
ENDIF
c
c Scale all the matrices, and the array TIMES
DO 30 J = 1,K
DO 20 I = 1,N
IF(TIMES(I,J) .GT. FPMIN) THEN
TIMES(I,J) = ALOG(TIMES(I,J))
ELSE
TIMES(I,J) = ALOG(FPMIN)
ENDIF
c
DO 10 II = 1,N
IF(A(II,I,J) .GT. FPMIN) THEN
A(II,I,J) = ALOG(A(II,I,J))
ELSE
A(II,I,J) = ALOG(FPMIN)
ENDIF
10 CONTINUE
20 CONTINUE
30 CONTINUE
c
DO 50 I=1,N
DO 40 J=1,N
IF(AA(I,J) .GT. FPMIN) THEN
AA(I,J) = ALOG(AA(I,J))
ELSE
AA(I,J) = ALOG(FPMIN)
ENDIF
40 CONTINUE
50 CONTINUE
c
c If N, K and T warrant, multiply together all K*K permutations of the
c K input matrices.
c
IF(T .LE. (3*K*K*N)) RETURN
c
DO 190 I = 1,K
DO 180 J = 1,K
DO 130 KK = 1,N
DO 100 L = 1,N
VTEMP(KK,L) = A(KK,1,I) + A(1,L,J)
PTEMP(KK,L) = 1
100 CONTINUE
c
DO 120 M = 2,N
DO 110 L = 1,N
TMP = A(KK,M,I)+A(M,L,J)
IF(TMP .GT. VTEMP(KK,L)) THEN
VTEMP(KK,L) = TMP
PTEMP(KK,L) = M
ENDIF
110 CONTINUE
120 CONTINUE
130 CONTINUE
c
c Multiply all K*K matrices in the table by the first A matrix. Then,
c to step by three, one just has to multiply by the appropriate matrix
c in the table using the first two steps as an index, and then using
c the multiplication factors of the third step to readjust the columns
c of the output.
DO 170 KK = 1,N
DO 140 L = 1,N
TAB(L,KK,I,J) = VTEMP(KK,1) + AA(1,L)
TABP(L,KK,I,J,1) = PTEMP(KK,1)
TABP(L,KK,I,J,2) = 1
140 CONTINUE
DO 160 M = 2,N
DO 150 L = 1,N
TEMP = VTEMP(KK,M) + AA(M,L)
IF(TEMP .GT. TAB(L,KK,I,J)) THEN
TAB(L,KK,I,J) = TEMP
TABP(L,KK,I,J,1) = PTEMP(KK,M)
TABP(L,KK,I,J,2) = M
ENDIF
150 CONTINUE
160 CONTINUE
170 CONTINUE
180 CONTINUE
190 CONTINUE
c
RETURN
END