SUBROUTINE P8S(A,IA,D,B,N,K,L,T,D2,Z)
c
ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Benchmark #8 -- Dynamic Program
c Large sparse matrices
c
c Parameters:
c
c Provided by the calling routine:
c A = K-long array of N by N matrices, packed into N by L arrays
c IA = N by L array of indices for the packing of A
c D = T-long array of integers between 1 and K used to
c select the appropriate A matrix
c N = The size of the A matrices
c K = The number of A matrices
c L = Number of non-zeros in each row and column of each A matrix
c T = The length of D, and one less than the length of B
c D2 = T = D1*D2, with neither of these equal to 1.
c
c Returned by this routine:
c B = T+1 long array containing the best path.
c Z = The log probability of this path
c
ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Basic Algorithm:
c
c The input matrices are assumed to be compressed along their
c columns. Thus there is an array containing the nonzero elements,
c compressed into L rows, and a same shaped array containing the
c row-numbers of these elements.
c
c The basic algorithm is then straight-forward:
c
c For i = 1,2,..,N Y[0](i) = 1.0
c For t = 1,2,...,T
c k = D(t)
c for i = 1,...,N
c Y[t](i) = A[k](i,1)*Y[t-1](IA[k](i,1))
c P[t](i) = IA[k](i,1)
c next i
c for j = 2,...,L
c for i = 1,...,N
c if A[k](i,j)*Y[t-1](IA[k](i,j)) > Y[t](i)
c Y[t](i) = A[k](i,j)*Y[t-1](IA[k](i,j))
c P[t](i) = IA[k](i,j)
c end if
c next i
c next j
c next t
c B(T) = 1
c YY = Y[T](1)
c For i = 2,...,N
c if Y[T](i) > YY
c YY = Y[T](i)
c B(T) = i
c end if
c Next i
c For t = T-1,T-2,...,1,0 set B(t) = P[t+1](B(t+1))
c
c Modifications to the Basic Algorithm:
c
c The algorithm would be as given in the Basic Algorithm above
c except for two things: the numbers tend to underflow, and
c the amount of storage required for all the Y and P arrays
c would overflow memory on any existing computer.
c
c To solve the underflow problem, note that the i which maximizes
c Y[t](j) = max Y[t-1](i)*A[k](i,j)
c also maximizes
c Log (Y[t](j)) = max Log (Y[t-1](i)) + Log (A[k](i,j))
c The matrices are non-negative. Thus, all work will be done
c in the log domain. (Zeros in the matrices, if any, will be replaced
c with very small values.)
c
c To solve the memory overflow problem, there are three options. One
c could
c a) store most of the Y and P arrays on secondary storage;
c b) compress the data so it does fit; or
c c) store only a portion of the data, from which the rest can be
c recomputed fairly quickly.
c It is this last option that is implemented here.
c
c Pick two numbers, D1 and D2, such that D1*D2=T. Then two passes can
c be made through the T steps. The first saves the Y arrays for every
c D1'th step. The second pass then takes the saved steps, starting
c with the last and working down, regenerating the P arrays for all the
c steps, and then generating the B array working backwards through
c the P's.
c
c The memory required for this solution is proportional to D1+D2, as
c opposed to D1*D2(=T). Obviously, memory used will be minimal if
c D1=D2=SQRT(T). For the parameter of T = 2000, this should fit on
c most modern high performance computers. And this is what is
c implemented here. As now coded, it is assumed that D1 * D2 = T, and
c that neither of these are equal to 1.
c
c Note that the work goes up as the number of passes. This work is
c needed to recompute the Y arrays. Thus, this requires roughly
c twice the work.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
USE LIMITS
c The number of steps
INTEGER T
c
c The number of Y array intermediate values to store in each
c dimension
INTEGER D1,D2
c
c The maximum number of steps to be done in the each subdivision
c of the last pass
PARAMETER (MD1=50)
c
c The maximum for D2
PARAMETER (MD2 = 50)
c
c The offset of the starting point of this subdivision of the steps.
INTEGER START
c
c Flag which is initialized to zero, indicating that the first time FWBW
c is called, B(T) is to be calculated. Thereafter, FIRST is set to 1
INTEGER FIRST
c
c The K NxN matrices, packed with L elements per column
REAL A(N,L,K)
c
c The indices indicating which row number of each of the L elements
c packed per column
INTEGER IA(N,L)
c
c The vector indicating which of the K matrices is to be used in each
c of the T steps
INTEGER D(T)
c
c The maximum for N
PARAMETER(MN = B8S_MN)
c
c The vectors formed after each step
REAL Y(MN,0:1)
c
c The output chain indicating the path through the Y vectors giving the
c maximum value in Y(T).
INTEGER B(0:T)
c
c The storage space for the D1 P vectors needed during each of the
c subdivisions of the last pass.
INTEGER PP(MN*MD1)
c
c The intermediate values of Y being stored at the end of each of the
c subdivisions of the first and second pass. The first D3 vectors are
c used for the first pass, and the remaining D2 for the second pass
REAL YIM(MN,MD2)
c
C--CVS variable declaration
TYPE CVS
sequence
character( 160 ) string
integer stringend
END TYPE CVS
C--CVS initilaize variables
TYPE( CVS ),save :: CVS_INFO =
$ CVS("BMARKGRP $Date: 2005/01/07 23:07:10 $ $Revision: 1.2 $" //
$ "$RCSfile: p8s.f,v $ $Name: rel_5 $", 0)
c
c
c Check the input parameters to verify that there is sufficient space
IF(MN .LT. N) THEN
PRINT 5,MN,N
5 FORMAT('Insufficient space in working arrays for P8S.',/,
$ 'Size available from parameter MN = ',I5,/,
$ 'Needed = ',I6)
STOP
ENDIF
c
D1 = T/D2
IF(MD1 .LT. D1) THEN
PRINT 10,MD1,D1
10 FORMAT('Insufficient space in working arrays for P8S.',/,
$ 'Size available from parameter MD1 = ',I8,/,
$ 'Needed = ',I9)
STOP
ENDIF
c
IF(MD2 .LT. D2) THEN
PRINT 15,MD2, D2
15 FORMAT('Insufficient space in working arrays for P8S.',/,
$ 'Size available from parameter MD2 = ',I8,/,
$ 'Needed = ',I8)
STOP
ENDIF
c
IF(T .NE. D1*D2) THEN
PRINT 20,T,D1,D2
20 FORMAT('In P8S, T must be divided evenly into partitions,',/,
$ 'so that T = D1*D2',/,
$ 'T = ',I8,/,'D1 = ',I8,/,'D2 = ',I8)
STOP
ENDIF
c
IF((D1 .EQ. 1) .OR. (D2 .EQ. 1)) THEN
PRINT 25,D1,D2
25 FORMAT('In P8S, both D1 and D2 must be greater than 1',/,
$ 'D1 = ',I8,/,'D2 = ',I8)
STOP
ENDIF
c
c
c NOTE: The code for timings are provided for internal
c instrumentation only. To use, just uncomment the code.
c Do not include these times in the final report.
c
c
c Convert the matrices to logs
c
Cc X0 = CPUTIME()
Cc Y0 = WALL()
c
CALL G8S(A,N,K,L)
c
Cc X0 = CPUTIME() - X0
Cc Y0 = WALL() -Y0
c
Cc PRINT 27, X0, Y0
Cc 27 FORMAT('Time to convert matrices to logs',/,
Cc $ ' CPU = ',F12.4,' seconds.',/,
Cc $ ' Wall Clock = ',F12.4,' seconds.',/)
c
c
c Initialize Y[1] to be all log(1.0).
DO 30 I = 1,N
YIM(I,1) = 0
30 CONTINUE
c
c Begin the first pass
c
Cc X0 = CPUTIME()
Cc Y0 = WALL()
c
c Calculate the D2-1 intermediate values spaced by D1 steps.
DO 35 I2 = 1,D2-1
CALL FW(IA,A,Y,D(D1*(I2-1)+1),YIM(1,I2),YIM(1,I2+1),
& N,K,D1,L)
35 CONTINUE
c
Cc X0 = CPUTIME() - X0
Cc Y0 = WALL() -Y0
Cc PRINT 37, X0, Y0
Cc 37 FORMAT('Time for first pass',/,
Cc $ ' CPU = ',F12.4,' seconds.',/,
Cc $ ' Wall Clock = ',F12.4,' seconds.',/)
c
c
c Begin the second pass
c
Cc X0 = CPUTIME()
Cc Y0 = WALL()
c
FIRST = 0
c
DO 40 I2 = D2,1,-1
START = D1*(I2-1)
c
c Calculate the I2'th of D2 groups of subdivisions of D1 steps,
c saving the P vector for each step, and backtrack computing B.
CALL FWBW(IA,A,Y,D(D1*(I2-1)+1),PP,B(D1*(I2-1)),
& YIM(1,I2),N,K,D1,L,FIRST,Z)
c
c After the first call to FWBW, B(T) doesn't need to be calculated
c anymore
FIRST = 1
40 CONTINUE
c
c
Cc X0 = CPUTIME() - X0
Cc Y0 = WALL() -Y0
Cc PRINT 43, X0, Y0
Cc 43 FORMAT('Time for last pass',/,
Cc $ ' CPU = ',F12.4,' seconds.',/,
Cc $ ' Wall Clock = ',F12.4,' seconds.',/)
c
RETURN
END
c
c
c
c
SUBROUTINE FW(IA,A,Y,D,IN,OUT,N,K,T,L)
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Subroutine to do subdivisions of steps for passes 1 through 3. The
c P vectors do not need to be calculated, as they will be calculated
c on pass 4. Calculates from one stored Y vector to the next.
c
ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c N, the size of the matrices
INTEGER N
c
c K, the number of matrices
INTEGER K
c
c The number of steps to do in THIS call to FW
INTEGER T
c
c The number of nonzero entries per column
INTEGER L
c
c The matrices stored in compressed form
REAL A(N,L,K)
c
c The row numbers of the elements stored in A
INTEGER IA(N,L)
c
c The matrix number to use in each of the steps
INTEGER D(T)
c
c The vectors formed after each step
REAL Y(N,0:1)
c
c The input vector for the first step
REAL IN(N)
c
c The output Y vector for the last step
REAL OUT(N)
c
c Do the first iteration from array IN to Y(,0)
DO 10 I = 1,N
Y(I,0) = IN(IA(I,1)) + A(I,1,D(1))
10 CONTINUE
c
DO 30 J = 2, L
DO 20 I=1,N
TEMP = IN(IA(I,J)) + A(I,J,D(1))
Y(I,0) = MAX(Y(I,0), TEMP)
20 CONTINUE
30 CONTINUE
IBUF = 1
IBUF2 = 0
c
c Do all but the last step, going between Y(,1) and Y(,0)
DO 70 KK = 2,T-1
c
c Need compiler directive to overcome confusion of ibuf .ne. ibuf2
CDIR$ IVDEP
c
DO 40 I = 1,N
Y(I,IBUF) = A(I,1,D(KK)) + Y(IA(I,1), IBUF2)
40 CONTINUE
c
DO 60 J = 2,L
c
c Need compiler directive to overcome confusion of ibuf .ne. ibuf2
CDIR$ IVDEP
c
DO 50 I = 1,N
TEMP = A(I,J,D(KK)) + Y(IA(I,J), IBUF2)
Y(I,IBUF) = MAX(Y(I,IBUF), TEMP)
50 CONTINUE
60 CONTINUE
c
IBUF2 = IBUF
IBUF = 1 - IBUF
c
70 CONTINUE
c
c On the last step, go between Y(,IBUF2) and OUT
DO 80 I=1,N
OUT(I) = A(I,1,D(T)) + Y(IA(I,1), IBUF2)
80 CONTINUE
c
DO 100 J = 2,L
DO 90 I = 1,N
TEMP = A(I,J,D(T)) + Y(IA(I,J), IBUF2)
OUT(I) = MAX(OUT(I), TEMP)
90 CONTINUE
100 CONTINUE
c
RETURN
END
c
c
c
c
SUBROUTINE FWBW(IA,A,Y,D,PP,B,IN,N,K,T,L,FIRST,Z)
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Subroutine to do the pass 4, including backtracking. This
c routine needs to calculate and store the P vectors in order to
c backtrack. However, it does not need to store any Y vectors.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c N, the size of the matrices
INTEGER N
c
c K, the number of matrices
INTEGER K
c
c The number of steps to do in THIS call to FW
INTEGER T
c
c The number of nonzero entries per column
INTEGER L
c
c The matrices stored in compressed form
REAL A(N,L,K)
c
c The row numbers of the elements stored in A
INTEGER IA(N,L)
c
c The matrix number to use in each of the steps
INTEGER D(T)
c
c The vectors formed after each step
REAL Y(N,0:1)
c
c The input vector for the first step.
REAL IN(N)
c
c The P vectors stored for these steps, used in backtracking to
c compute B
INTEGER PP(N,T)
c
c The output B vector for these steps (actually B(T) is input)
INTEGER B(0:T)
c
c Flag indicating whether or not this is the first call to FWBW.
c If so, it is for the final steps, and so B(T) must be calculated
c rather than received from input.
INTEGER FIRST
c
c The log probability for the most likely path
REAL Z
c
c Do the first iteration from array IN to Y(,0)
DO 10 I = 1,N
Y(I,0) = IN(IA(I,1)) + A(I,1,D(1))
PP(I,1) = IA(I,1)
10 CONTINUE
c
DO 30 J = 2, L
DO 20 I=1,N
TEMP = IN(IA(I,J)) + A(I,J,D(1))
IF (TEMP .GT. Y(I,0)) THEN
Y(I,0) = TEMP
PP(I,1) = IA(I,J)
ENDIF
20 CONTINUE
30 CONTINUE
IBUF = 1
IBUF2 = 0
c
c Do the remaining steps, going between Y(,1) and Y(,0)
DO 70 KK = 2,T
c
c Need compiler directive to overcome confusion of ibuf .ne. ibuf2
CDIR$ IVDEP
c
DO 40 I = 1,N
PP(I,KK) = IA(I,1)
Y(I,IBUF) = A(I,1,D(KK)) + Y(IA(I,1), IBUF2)
40 CONTINUE
c
DO 60 J = 2,L
c
c Need compiler directive to overcome confusion of ibuf .ne. ibuf2
CDIR$ IVDEP
c
DO 50 I = 1,N
TEMP = A(I,J,D(KK)) + Y(IA(I,J), IBUF2)
IF (TEMP .GT. Y(I,IBUF)) THEN
Y(I,IBUF) = TEMP
PP(I,KK) = IA(I,J)
END IF
50 CONTINUE
60 CONTINUE
c
IBUF2 = IBUF
IBUF = 1 - IBUF
70 CONTINUE
c
c If these are the last steps, compute B(T) by taking the maximum
c value in Y(T) and storing its position
IF (FIRST .EQ. 0) THEN
c
c Start with the first value in Y(T)
TEMP = Y(1,IBUF2)
II = 1
DO 100 I = 2,N
c
c If any following element is greater the the maximum value so far,
c store the new maximum and its position
IF (Y(I,IBUF2) .GT. TEMP) THEN
TEMP = Y(I,IBUF2)
II = I
END IF
100 CONTINUE
B(T) = II
Z = TEMP
END IF
c
c Backtrack over all the steps computed in this subroutine
DO 110 I = T-1,0,-1
B(I) = PP(B(I+1),I+1)
110 CONTINUE
c
RETURN
END
c
c
c
c
SUBROUTINE G8S(A,N,K,L)
ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c Subroutine to convert A matrices to logs, to eliminate the need to
c rescale.
c
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
c
c The A matrices.
REAL A(N,L,K)
c
c The user-supplied minimum allowed floating point number
c PARAMETER (FPMIN=1.0E-2465)
c PARAMETER (FPMIN=2.2E-308)
PARAMETER (FPMIN=1.0E-307)
c
c Scale each matrix
DO 30 I = 1,K
DO 20 J = 1,L
DO 10 KK =1,N
IF(A(KK,J,I) .LT. FPMIN) A(KK,J,I) = FPMIN
A(KK,J,I) = ALOG(A(KK,J,I))
10 CONTINUE
20 CONTINUE
30 CONTINUE
c
RETURN
END