PROGRAM M11 c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c Benchmark #11 -- Multiple Precision Arithmetic c c Given two NxN matrices with entries in the ring of integers modulo c some number MODULUS, compute the product of the matrices. c c Default Parameters: N = 225 MODULUS = (7**183) - 6 c ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c Main Program for Benchmark #11 c c Call time parameters: c c -v = Verbose mode, invokes printing of intermediate information. c -V = Very verbose mode. c c N = The size of the matrices c Min allowed = 4 c Max allowed = 512 c Default = 225 c c MODULUS = The modulus c Max allowed = 1024 bits c Default = 7^183-6 (514 bits) c A "-" indicates reading the modulus from standard-in. c c Usage: c bench11 32 123456789 (32x32 matrices modulo 123456789) c bench11 N MODULUS (NxN matrices modulo MODULUS) c bench11 N (use the default value for MODULUS) c bench11 N 0 (use the default value for MODULUS) c bench11 0 0 (use the default values for N, MODULUS) c bench11 (use the default values for N, MODULUS) c bench11 N i (i=1,2,...,10) uses the modulus Mi from the list below c c echo "2^512-1" | bc | bench11 100 - (100x100 matrices mod 2^512-1) c cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c The check routine is designed to verify the results for any c combination of N and MODULUS from the following list: c c N = 5, 25, 100, 225, 256, 512 c c MODULUS: c M1 = 7^23-6 (65 bits) c M2 = 2^89-1 (89 bits) c M3 = 2^127-1 (127 bits) c M4 = 3^121-2 (192 bits) c M5 = 9^81-8 (257 bits) c M6 = 5^166-4 (386 bits) c M7 = 2^512-1 (512 bits) c M8 = 7^183-6 (514 bits) (the default) c M9 = 3^477-2 (757 bits) c M10 = 5^430-4 (999 bits) c c Other combinations cannot be verified. c ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c c Note that this benchmark assumes the existence of a preprocessor. c If the particular architecture does not support the *.F file type, c appropriate *.f files can be generated with any c preprocessor on c a workstation and need not be generated on the actual architecture. c This approach allows one to tailor the *.h files optimally for that c particular architecture. c ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c IMPLICIT NONE c c This includes an architecture-specific .h file c #include "bench11.h" c c Arrays to hold the matrices c MPLONG A( :, :, : ) MPLONG B( :, :, : ) MPLONG C( :, :, : ) ALLOCATABLE A, B, C c c An array to hold the modulus c MPLONG MODULUS(MAXMODSIZE) c c An array to hold the 10 checkable moduli c MPLONG DEFMODS(33,10) INTEGER DEFMODNUM c #if BITSPERWORD == 23 c Data for a machine with 23 bits per multiple precision word (eg CRAY) DATA (DEFMOD(I),I=1,24) / 23, * 2345073, 6268386, 7122414, 967733, 7782757, 4336732, * 7036344, 5860704, 2506226, 8003024, 6255620, 7518812, * 4150431, 729597, 25318, 660986, 8294841, 4294177, * 4269410, 381337, 7312422, 5587860, 214 / #endif #if BITSPERWORD == 32 c Data for a machine with 32 bits per multiple precision word DATA ( DEFMODS(I,1), I = 1, 4 ) / 3, * Z'30F0A771', Z'7BD152B3', 1 / DATA ( DEFMODS(I,2), I = 1, 4 ) / 3, * Z'FFFFFFFF', Z'FFFFFFFF', Z'01FFFFFF' / DATA ( DEFMODS(I,3), I = 1, 5 ) / 4, * Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', Z'7FFFFFFF' / DATA ( DEFMODS(I,4), I = 1, 7 ) / 6, * Z'12CACD21', Z'22D2F6C3', Z'92AB1311', Z'9D135AC8', * Z'CD3D4156', Z'DBDCFD23' / DATA ( DEFMODS(I,5), I = 1, 10 ) / 9, * Z'F8DB7C81', Z'786C0065', Z'95D05DC0', Z'5CC1C941', * Z'A7987CBA', Z'D6FD8182', Z'278B4D09', Z'B2B6F77A', 1 / DATA ( DEFMODS(I,6), I = 1, 14 ) / 13, * Z'5BB45685', Z'2F03B89A', Z'0C238E65', Z'D0979D30', * Z'7D1B506D', Z'334B46E8', Z'F63B8E74', Z'DC82E031', * Z'6842B9A8', Z'BD586A9E', Z'80152BA6', Z'B69CB330', 2 / DATA ( DEFMODS(I,7), I = 1, 17 ) / 16, * Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', * Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', * Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', * Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF', Z'FFFFFFFF' / DATA ( DEFMODS(I,8), I = 1, 18 ) / 17, * Z'F123C871', Z'2B7BAFD2', Z'51D886BB', Z'62E76C16', * Z'AD76E211', Z'F2B2DAC1', Z'0EE8263D', Z'97DD013D', * Z'49FE574B', Z'590FEBF5', Z'F4018B98', Z'91B9142B', * Z'A0C310FE', Z'33304958', Z'F94260BA', Z'5AAA1CA6', 3 / DATA ( DEFMODS(I,9), I = 1, 25 ) / 24, * Z'BFB72E91', Z'3D890F36', Z'A692A7BB', Z'FCA7523A', * Z'BD1C5E4D', Z'88CEDE98', Z'19C47C10', Z'302879A8', * Z'98D73E3D', Z'1C54BA18', Z'7EFE1607', Z'5D42F516', * Z'5290AE47', Z'4D35BA75', Z'C2A13902', Z'F567F6AB', * Z'5C5D0361', Z'66153C40', Z'F9DC5FB4', Z'88E1811F', * Z'7923A048', Z'DDCAF0C6', Z'6CCF51C0', Z'104DB4' / DATA ( DEFMODS(I,10), I = 1, 33 ) / 32, * Z'22E30765', Z'C81109B4', Z'C07755A8', Z'BAE3EC77', * Z'AB13676F', Z'4E263E53', Z'3D1E4D7B', Z'D2E5D070', * Z'5E140CBB', Z'79C1E1FB', Z'361F35F8', Z'826C9A87', * Z'53A3D469', Z'DA1DFDE4', Z'FFCDA2F1', Z'9BC1690A', * Z'D6468B5D', Z'856A8777', Z'79E33E50', Z'83412B3C', * Z'3391E087', Z'D6C198C0', Z'04D38D4C', Z'34AF758C', * Z'B09D292C', Z'E3E3BECA', Z'132CF817', Z'8BF9D3BB', * Z'837B535A', Z'1C6AFDFA', Z'2B040960', Z'56' / #endif #if BITSPERWORD == 64 c Data for a machine with 64 bits per multiple precision word DATA ( DEFMODS(I,1), I = 1, 3 ) / 2, * Z'7BD152B330F0A771', 1 / DATA ( DEFMODS(I,2), I = 1, 3 ) / 2, * Z'FFFFFFFFFFFFFFFF', Z'01FFFFFF' / DATA ( DEFMODS(I,3), I = 1, 3 ) / 2, * Z'FFFFFFFFFFFFFFFF', Z'7FFFFFFFFFFFFFFF' / DATA ( DEFMODS(I,4), I = 1, 4 ) / 3, * Z'22D2F6C312CACD21', Z'9D135AC892AB1311', * Z'DBDCFD23CD3D4156' / DATA ( DEFMODS(I,5), I = 1, 6 ) / 5, * Z'786C0065F8DB7C81', Z'5CC1C94195D05DC0', * Z'D6FD8182A7987CBA', Z'B2B6F77A278B4D09', 1 / DATA ( DEFMODS(I,6), I = 1, 8 ) / 7, * Z'2F03B89A5BB45685', Z'D0979D300C238E65', * Z'334B46E87D1B506D', Z'DC82E031F63B8E74', * Z'BD586A9E6842B9A8', Z'B69CB33080152BA6', 2 / DATA ( DEFMODS(I,7), I = 1, 9 ) / 8, * Z'FFFFFFFFFFFFFFFF', Z'FFFFFFFFFFFFFFFF', * Z'FFFFFFFFFFFFFFFF', Z'FFFFFFFFFFFFFFFF', * Z'FFFFFFFFFFFFFFFF', Z'FFFFFFFFFFFFFFFF', * Z'FFFFFFFFFFFFFFFF', Z'FFFFFFFFFFFFFFFF' / DATA ( DEFMODS(I,8), I = 1, 10 ) / 9, * Z'2B7BAFD2F123C871', Z'62E76C1651D886BB', * Z'F2B2DAC1AD76E211', Z'97DD013D0EE8263D', * Z'590FEBF549FE574B', Z'91B9142BF4018B98', * Z'33304958A0C310FE', Z'5AAA1CA6F94260BA', 3 / DATA ( DEFMODS(I,9), I = 1, 13 ) / 12, * Z'3D890F36BFB72E91', Z'FCA7523AA692A7BB', * Z'88CEDE98BD1C5E4D', Z'302879A819C47C10', * Z'1C54BA1898D73E3D', Z'5D42F5167EFE1607', * Z'4D35BA755290AE47', Z'F567F6ABC2A13902', * Z'66153C405C5D0361', Z'88E1811FF9DC5FB4', * Z'DDCAF0C67923A048', Z'00104DB46CCF51C0' / DATA ( DEFMODS(I,10), I = 1, 17 ) / 16, * Z'C81109B422E30765', Z'BAE3EC77C07755A8', * Z'4E263E53AB13676F', Z'D2E5D0703D1E4D7B', * Z'79C1E1FB5E140CBB', Z'826C9A87361F35F8', * Z'DA1DFDE453A3D469', Z'9BC1690AFFCDA2F1', * Z'856A8777D6468B5D', Z'83412B3C79E33E50', * Z'D6C198C03391E087', Z'34AF758C04D38D4C', * Z'E3E3BECAB09D292C', Z'8BF9D3BB132CF817', * Z'1C6AFDFA837B535A', Z'562B040960' / c #endif C--CVS variable declaration TYPE CVS sequence character( 160 ) string integer stringend END TYPE CVS C--CVS initilaize variables TYPE( CVS ),save :: CVS_INFO = $ CVS("BMARKGRP $Date: 2005/01/10 20:46:00 $ $Revision: 1.2 $" // $ "$RCSfile: m11.F,v $ $Name: rel_5 $", 0) c c Character variables to hold command line arguments c CHARACTER*10 ARG1 CHARACTER*310 ARG2 c c Variables for timing purposes c DOUBLE PRECISION CPUTIME, WALL, X0, Y0 DOUBLE PRECISION CSET, WSET, CRUN, WRUN, CCHECK, WCHECK c c Other variables c INTEGER NN, N, Q, M, NUMSIZE, MODBITS INTEGER I, ERR #ifdef CRAY INTEGER NUMARGS, IPXFARGC, ARG_LEN, G_ARG_ERR #else INTEGER NUMARGS, IARGC #endif MPLONG MODSIZE c c If bench11 is run on a machine without a leading-zero intrinsic, c must use the software version of LEADZ(n), found in util.f. c #ifdef SW_LEADZ CALL INITLZ() #endif c c Get input parameters c #ifdef CRAY NUMARGS = IPXFARGC() #else NUMARGS = IARGC() #endif c c Read the command line arguments c IF (NUMARGS .GE. 1) THEN #ifdef CRAY CALL PXFGETARG(1, ARG1, ARG_LEN, G_ARG_ERR) #else CALL GETARG(1, ARG1) #endif c IF((ARG1 .EQ. "-v") .OR. (ARG1 .EQ. "-V")) THEN IF(ARG1 .EQ. "-v") THEN VERBOSE = 1 ELSE VERBOSE = 2 ENDIF NUMARGS = NUMARGS - 1 IF (NUMARGS .GE. 1) THEN #ifdef CRAY CALL PXFGETARG(2, ARG1, ARG_LEN, G_ARG_ERR) #else CALL GETARG(2, ARG1) #endif ENDIF IF (NUMARGS .GE. 2) THEN #ifdef CRAY CALL PXFGETARG(3, ARG2, ARG_LEN, G_ARG_ERR) #else CALL GETARG(3, ARG2) #endif ENDIF c ELSE c VERBOSE = 0 IF (NUMARGS .GE. 2) THEN #ifdef CRAY CALL PXFGETARG(2, ARG2, ARG_LEN, G_ARG_ERR) #else CALL GETARG(2, ARG2) #endif ENDIF c ENDIF c ENDIF c c ARG1 = the size of the data c IF (NUMARGS .GE. 1) THEN READ(ARG1, 5) N 5 FORMAT(I5) ELSE N = 0 ENDIF c c Default c IF(N .LE. 0) N = 225 c c Check for in range c IF(N .LT. 4) THEN PRINT 15, N 15 FORMAT("Minimum allowed value for N is 4, N = ", I5) STOP ENDIF c IF(N .GT. MAXMATSIZE) THEN PRINT 25, MAXMATSIZE, N 25 FORMAT("Maximum value for N is ", I5, ", N = ", I5) STOP ENDIF c c Prepare the structure for the modulus c MODULUS(MPALLOC) = MAXMODSIZE-MPHL #if (GMP32 == 0) MODULUS(MPSIZE) = 0 #endif #ifdef GMP MODULUS(MPPTR) = LOC ( MODULUS(MPDATA) ) #endif c c ARG2 = the modulus c IF (NUMARGS .GE. 2) THEN IF (ARG2(1:1) .EQ. "-") THEN IF(VERBOSE .GT. 0) PRINT*, "Reading MODULUS from stdin:" CALL MPREAD(MODULUS) ELSE CALL MPFROMSTRING(MODULUS, ARG2) ENDIF ENDIF c #if (GMP32 == 0) MODSIZE = MODULUS(MPSIZE) #else MODSIZE = ISHFT( MODULUS(1), -32 ) #endif c c Check for default modulus or one of the 10 checkable moduli c DEFMODNUM = 0 IF (MODSIZE .EQ. 0) THEN DEFMODNUM = 8 ELSE IF ( (MODSIZE .EQ. 1) .AND. (MODULUS(MPDATA) .LE. 10) ) THEN DEFMODNUM = MODULUS(MPDATA) IF (DEFMODNUM .EQ. 0) DEFMODNUM = 8 ENDIF ENDIF IF (DEFMODNUM .NE. 0) THEN MODSIZE = DEFMODS(1,DEFMODNUM) DO I = 1, MODSIZE MODULUS(MPDATA+I-1) = DEFMODS(1+I,DEFMODNUM) ENDDO ENDIF c NUMSIZE = MODSIZE + MPHL #if (GMP32 == 0) MODULUS(MPALLOC) = MODSIZE MODULUS(MPSIZE) = MODSIZE #else MODULUS(MPALLOC) = MODSIZE + ISHFT( MODSIZE, 32 ) #endif c c Calculate number of bits in the modulus; start with highest limb c MODBITS = WORDLEN - LEADZ( MODULUS(NUMSIZE) ) c c Now add total bits in lower-order limbs c MODBITS = MODBITS + BITSPERWORD * (MODSIZE-1) c cccccccccccccc VERBOSE Output ccccccccccccccc IF(VERBOSE .GT. 0) THEN PRINT*, "Using a modulus of: " CALL MPWRITE(MODULUS) PRINT*, " of ", MODBITS, " bits" PRINT*, "We are multiplying two ", N, "x", N, " matrices" ENDIF ccccccccccccccccccccccccccccccccccccccccccccccc c c S11 builds the "random" matrices A and B c X0 = CPUTIME() Y0 = WALL() c c Allocate the matrices A, B c ALLOCATE ( A(NUMSIZE,N,N), B(NUMSIZE,N,N) ) c IF(VERBOSE .GT. 0) PRINT*, "Calling subroutine S11" c CALL S11 ( A, B, N, NUMSIZE, MODBITS ) c IF(VERBOSE .GT. 0) PRINT*, "Returned from subroutine S11" c c Allocate the matrix C c ALLOCATE ( C(NUMSIZE,N,N) ) c CSET = CPUTIME() - X0 WSET = WALL() - Y0 c c P11 does all the real work c X0 = CPUTIME() Y0 = WALL() IF(VERBOSE .GT. 0) PRINT*, "Calling subroutine P11" c CALL P11 ( A, B, C, MODULUS, N, NUMSIZE ) c IF(VERBOSE .GT. 0) PRINT*, "Returned from subroutine P11" c CRUN = CPUTIME() - X0 WRUN = WALL() - Y0 c c C11 checks the results c X0 = CPUTIME() Y0 = WALL() c IF(VERBOSE .GT. 0) PRINT*, "Calling subroutine C11" c CALL C11 ( C, MODULUS, N, NUMSIZE, MODBITS, ERR ) c IF(VERBOSE .GT. 0) PRINT*, "Returned from subroutine C11" c CCHECK = CPUTIME() - X0 WCHECK = WALL() - Y0 c c R11 prints results c IF(VERBOSE .GT. 0) PRINT*, "Calling subroutine R11" c CALL R11 ( C, MODULUS, N, NUMSIZE, ERR, * CSET, WSET, CRUN, WRUN, CCHECK, WCHECK ) c IF(VERBOSE .GT. 0) PRINT*, "Returned from subroutine R11" c STOP END