// ============================================================================================= // Chapel implementation of the STREAM benchmark. // // This implementation is uses Chapels' data parallel model. // // This source informed by the "official" version from McCalpin as well as that distributed // with the Chapel compiler, version 0.5. // // Here are the intructions from the McCalpin version: // // 1) Stream requires a good bit of memory to run. Adjust the // value of 'N' (below) to give a 'timing calibration' of // at least 20 clock-ticks. This will provide rate estimates // that should be good to about 5% precision. // // Note that variables qualified with 'config' may be changed at runtime, // by executing as "a.out --=". // Also note that ./a.out --help lists this and other information. // ============================================================================================= use Time, Types; param NumVectors = 3; type elemType = int ( 32 ); param GIGABYTE = 1.0e+9; config const Datatype = "INTEGER(32)", Debug = false, // Debugging data. ialpha = 3, // Scalar used in computations. initBval = 3, // Array B initialization value. initCval = 5, // Array C initialization value. N = 50000000, // Array dimension. NumIter = 10, // Number of iterations per test. NumThreads = 1, // Number of threads. Tolerance = 0; // Error tolerance. const ArrayShape : domain ( 1 ) distributed ( Block ) = [ 1..(N * NumThreads) ]; def main ( ) { var A, B, C // Data arrays. : [ArrayShape] elemType; var times : [1..NumIter] real, // Array of total times for tests. t0 : real, // Timer start value. Answer : bool; // Correctness check return value. // --------------------- // Executable Statements // --------------------- ReportHeader ( ); InitializeArrays ( B, C ); // ================================ Copy: A ( I ) = B ( I ) =============================== for iter in 1..NumIter { t0 = getCurrentTime ( ); A = B; times ( iter ) = getCurrentTime ( ) - t0; } writeln ( ); writeln ( " ============================================================================== " ); Answer = CheckAnswer ( "COPY", A, B, C ); ReportResults ( "COPY", Answer, times ); // =========================== Scale: A ( I ) = ialpha * B ( I ) =========================== for iter in 1..NumIter { t0 = getCurrentTime ( ); A = ialpha * B; times ( iter ) = getCurrentTime ( ) - t0; } Answer = CheckAnswer ( "SCALE", A, B, C ); ReportResults ( "SCALE", Answer, times ); // ============================ Sum: A ( I ) = B ( I ) + C ( I ) ========================= for iter in 1..NumIter { t0 = getCurrentTime ( ); A = B + C; times ( iter ) = getCurrentTime ( ) - t0; } Answer = CheckAnswer ( "SUM", A, B, C ); ReportResults ( "SUM", Answer, times ); // ======================= Triad: A ( I ) = B ( I ) + ialpha * C ( I ) ===================== for iter in 1..NumIter { t0 = getCurrentTime ( ); A = B + ialpha * C; times ( iter ) = getCurrentTime ( ) - t0; } Answer = CheckAnswer ( "TRIAD", A, B, C ); ReportResults ( "TRIAD", Answer, times ); writeln ( " End STREAM test." ); writeln ( " ============================================================================ " ); writeln ( ); } // ========================================= End main ======================================== // ====================================== Utility functions ==================================== def ReportHeader ( ) { writeln ( ); writeln ( " ============================================================================ " ); writeln ( ); writeln ( " STREAM benchmark testing. " ); writeln ( ); writeln ( " using the Chapel programming language, data parallel model. " ); writeln ( ); writeln ( " Test details: " ); writeln ( ); writeln ( " Datatype : ", Datatype ); writeln ( " Bytes per array element : ", numBytes ( elemType ) ); writeln ( " Array dimension : ", N / GIGABYTE, " GBytes." ); writeln ( " Total memory requirement : ", 3 * N * numBytes ( elemType ) / GIGABYTE, " GBytes." ); writeln ( ); writeln ( " Number of iterations : ", NumIter ); } def InitializeArrays ( B, C ) { B = initBval; C = initCval; if ( Debug ) { writeln ( " B = [", B, "]" ); writeln ( ); writeln ( " C = [", C, "]" ); } } // ======================================== Verify results ===================================== def CheckAnswer ( test, A, B, C ) { select ( test ) { when "COPY" do { const infNormA = max reduce [ i in ArrayShape ] abs ( A ( i ) ); const infNormB = max reduce [ i in ArrayShape ] abs ( B ( i ) ); return ( infNormA - infNormB <= Tolerance ); } when "SCALE" do { const infNorm = max reduce [ i in ArrayShape ] abs ( A ( i ) - ( ialpha * B ( i ) ) ); return ( infNorm <= Tolerance ); } when "ADD" do { const infNorm = max reduce [ i in ArrayShape ] abs ( C ( i ) - ( A ( i ) + B ( i ) ) ); return ( infNorm <= Tolerance ); } when "TRIAD" do { const infNorm = max reduce [ i in ArrayShape ] abs ( A ( i ) - ( B ( i ) + ialpha * C ( i ) ) ); return ( infNorm <= Tolerance ); } } // End select ( test ). } // ======================================== Print results ====================================== def ReportResults ( test, ierr, TimeWall ) { // Time stats: const TimeTotal = + reduce TimeWall; const TimeMean = TimeTotal / NumIter; const TimeMax = max reduce TimeWall; const TimeMin = min reduce TimeWall; const TimeSquareOfTheSum = TimeTotal**2; var TimeSumOfTheSquares = 0.0; for i in 1..NumIter do { TimeSumOfTheSquares += TimeWall ( i )**2; } var TimeDiffSquared = 0.0; for i in 1..NumIter do { TimeDiffSquared += ( TimeWall ( i ) - TimeMean )**2; } // const TimeStdDev = sqrt ( TimeSumOfTheSquares - TimeSquareOfTheSum ); const TimeStdDev = sqrt ( TimeDiffSquared / NumIter ); // Performance stats (bandwidth): const PerfMax = NumVectors * numBytes ( elemType ) * ( N / TimeMin ) / GIGABYTE; const PerfMin = NumVectors * numBytes ( elemType ) * ( N / TimeMax ) / GIGABYTE; const PerfMean = NumVectors * numBytes ( elemType ) * ( N / TimeMean ) / GIGABYTE; const PerfStdDev = TimeStdDev * PerfMean; writeln ( ); writeln ( " ============================================================================== " ); writeln ( ); writeln ( " STREAM ", test, " results" ); writeln ( ); if ierr then { writeln ( " [", NumThreads, " threads] Test passed." ); } else { writeln ( " [", NumThreads, " threads] Test failed." ); } writeln ( ); writeln ( " Execution statistics (per iteration): " ); writeln ( ); writeln ( " Time (secs): ( Min, Mean, Max ) = (", TimeMin, ", ", TimeMean, ", ", TimeMax, ")" ); writeln ( ); writeln ( " StdDev = ", TimeStdDev, " Total : ", TimeTotal ); writeln ( ); writeln ( " Performance: GB/sec ( Min, Mean, Max ) = (", PerfMin, ", ", PerfMean, ", ", PerfMax, ")" ); writeln ( ); writeln ( " StdDev = ", PerfStdDev ); writeln ( ); writeln ( " STREAM ", test, " number is ", PerfMax, " GBytes/sec." ); writeln ( " ============================================================================== " ); writeln ( ); } // =============================================================================================