#include"SimpleMOC_header.h" /* Efficient version of attenuate fluxes which determines the change in angular * flux along a particular track across a fine axial region and tallies the * contribution to the scalar flux in the fine axial region. This function * assumes a quadratic source, which is calculated on the fly using neighboring * source values. * * This version decomposes the work into many for loops for efficient SIMD * instructions and to reduce register pressure. For a more descriptive * (but less effiient) version of the code in terms of the underlying physics, * see alt_attenuate_fluxes which solves the problem in a more naive, * straightforward manner. */ void attenuate_fluxes( Track * track, bool forward, Source * QSR, Input * I_in, Params * params_in, float ds, float mu, float az_weight, AttenuateVars * A ) { Input I = *I_in; Params params = *params_in; // unload attenuate vars float * restrict q0 = A->q0; float * restrict q1 = A->q1; float * restrict q2 = A->q2; float * restrict sigT = A->sigT; float * restrict tau = A->tau; float * restrict sigT2 = A->sigT2; float * restrict expVal = A->expVal; float * restrict reuse = A->reuse; float * restrict flux_integral = A->flux_integral; float * restrict tally = A->tally; float * restrict t1 = A->t1; float * restrict t2 = A->t2; float * restrict t3 = A->t3; float * restrict t4 = A->t4; // compute fine axial interval spacing float dz = I.height / (I.fai * I.decomp_assemblies_ax * I.cai); // compute z height in cell float zin = track->z_height - dz * ( (int)( track->z_height / dz ) + 0.5f ); // compute fine axial region ID int fine_id = (int) ( track->z_height / dz ) % I.fai; // compute weight (azimuthal * polar) // NOTE: real app would also have volume weight component float weight = track->p_weight * az_weight; float mu2 = mu * mu; // load fine source region flux vector float * FSR_flux = QSR -> fine_flux[fine_id]; if( fine_id == 0 ) { // adjust z height to account for edge zin -= dz; // cycle over energy groups #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { // load neighboring sources float y1 = QSR->fine_source[fine_id][g]; float y2 = QSR->fine_source[fine_id+1][g]; float y3 = QSR->fine_source[fine_id+2][g]; // do quadratic "fitting" float c0 = y2; float c1 = (y1 - y3) / (2.f*dz); float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz); // calculate q0, q1, q2 q0[g] = c0 + c1*zin + c2*zin*zin; q1[g] = c1 + 2.f*c2*zin; q2[g] = c2; } } else if ( fine_id == I.fai - 1 ) { // adjust z height to account for edge zin += dz; // cycle over energy groups #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { // load neighboring sources float y1 = QSR->fine_source[fine_id-2][g]; float y2 = QSR->fine_source[fine_id-1][g]; float y3 = QSR->fine_source[fine_id][g]; // do quadratic "fitting" float c0 = y2; float c1 = (y1 - y3) / (2.f*dz); float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz); // calculate q0, q1, q2 q0[g] = c0 + c1*zin + c2*zin*zin; q1[g] = c1 + 2.f*c2*zin; q2[g] = c2; } } else { // cycle over energy groups #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { // load neighboring sources float y1 = QSR->fine_source[fine_id-1][g]; float y2 = QSR->fine_source[fine_id][g]; float y3 = QSR->fine_source[fine_id+1][g]; // do quadratic "fitting" float c0 = y2; float c1 = (y1 - y3) / (2.f*dz); float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz); // calculate q0, q1, q2 q0[g] = c0 + c1*zin + c2*zin*zin; q1[g] = c1 + 2.f*c2*zin; q2[g] = c2; } } // cycle over energy groups #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { // load total cross section sigT[g] = QSR->sigT[g]; // calculate common values for efficiency tau[g] = sigT[g] * ds; sigT2[g] = sigT[g] * sigT[g]; } // cycle over energy groups #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) expVal[g] = interpolateTable( params.expTable, tau[g] ); // Flux Integral // Re-used Term #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { reuse[g] = tau[g] * (tau[g] - 2.f) + 2.f * expVal[g] / (sigT[g] * sigT2[g]); } float * psi; if(forward) psi = track->f_psi; else psi = track->b_psi; //#pragma vector nontemporal #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { // add contribution to new source flux flux_integral[g] = (q0[g] * tau[g] + (sigT[g] * psi[g] - q0[g]) * expVal[g]) / sigT2[g] + q1[g] * mu * reuse[g] + q2[g] * mu2 * (tau[g] * (tau[g] * (tau[g] - 3.f) + 6.f) - 6.f * expVal[g]) / (3.f * sigT2[g] * sigT2[g]); } #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { // Prepare tally tally[g] = weight * flux_integral[g]; } #ifdef OPENMP omp_set_lock(QSR->locks + fine_id); #endif #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { FSR_flux[g] += tally[g]; } #ifdef OPENMP omp_unset_lock(QSR->locks + fine_id); #endif // Term 1 #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { t1[g] = q0[g] * expVal[g] / sigT[g]; } // Term 2 #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { t2[g] = q1[g] * mu * (tau[g] - expVal[g]) / sigT2[g]; } // Term 3 #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { t3[g] = q2[g] * mu2 * reuse[g]; } // Term 4 #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { t4[g] = psi[g] * (1.f - expVal[g]); } // Total psi #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I.n_egroups; g++) { psi[g] = t1[g] + t2[g] + t3[g] + t4[g]; } } // single direction transport sweep void transport_sweep( Params * params, Input * I ) { if(I->mype==0) printf("Starting transport sweep ...\n"); // calculate the height of a node's domain and of each FSR double node_delta_z = I->height / I->decomp_assemblies_ax; double fine_delta_z = node_delta_z / (I->cai * I->fai); /* loop over tracks (implicitly azimuthal angles, tracks in azimuthal * angles, polar angles, and z stacked rays) */ //print_Input_struct( I ); long segments_processed = 0; #pragma omp parallel default(none) \ shared( I, params, node_delta_z, fine_delta_z ) \ reduction(+ : segments_processed ) { #ifdef OPENMP int thread = omp_get_thread_num(); int nthreads = omp_get_num_threads(); unsigned int seed = time(NULL) * (thread+1); #endif //print_Input_struct( I ); #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events, I); } #endif AttenuateVars A; float * ptr = (float * ) malloc( I->n_egroups * 14 * sizeof(float)); A.q0 = ptr; ptr += I->n_egroups; A.q1 = ptr; ptr += I->n_egroups; A.q2 = ptr; ptr += I->n_egroups; A.sigT = ptr; ptr += I->n_egroups; A.tau = ptr; ptr += I->n_egroups; A.sigT2 = ptr; ptr += I->n_egroups; A.expVal = ptr; ptr += I->n_egroups; A.reuse = ptr; ptr += I->n_egroups; A.flux_integral = ptr; ptr += I->n_egroups; A.tally = ptr; ptr += I->n_egroups; A.t1 = ptr; ptr += I->n_egroups; A.t2 = ptr; ptr += I->n_egroups; A.t3 = ptr; ptr += I->n_egroups; A.t4 = ptr; #pragma omp for schedule( dynamic ) for (long i = 0; i < I->ntracks_2D; i++) { #if TIMING_INFO | 0 // print progress #ifdef OPENMP if(I->mype==0 && thread == 0) { printf("\rAttenuating Tracks... (%.0lf%% completed)", (i / ( (double)I->ntracks_2D / (double) nthreads )) / (double) nthreads * 100.0); } #else if( i % 50 == 0) if(I->mype==0) printf("%s%ld%s%ld\n","2D Tracks Completed = ", i," / ", I->ntracks_2D ); #endif #endif // treat positive-z traveling rays first bool pos_z_dir = true; for( int j = 0; j < I->n_polar_angles; j++) { if( j == I->n_polar_angles / 2 ) pos_z_dir = false; float p_angle = params->polar_angles[j]; float mu = cos(p_angle); // start with all z stacked rays int begin_stacked = 0; int end_stacked = I->z_stacked; for( int n = 0; n < params->tracks_2D[i].n_segments; n++) { // calculate distance traveled in cell if segment completed float s_full = params->tracks_2D[i].segments[n].length / sin(p_angle); // allocate varaible for distance traveled in an FSR float ds = 0; // loop over remaining z-stacked rays for( int k = begin_stacked; k < end_stacked; k++) { // initialize s to full length float s = s_full; // select current track Track * track = ¶ms->tracks[i][j][k]; // set flag for completeion of segment bool seg_complete = false; // calculate interval int curr_interval; if( pos_z_dir) curr_interval = get_pos_interval(track->z_height, fine_delta_z); else curr_interval = get_neg_interval(track->z_height, fine_delta_z); while( !seg_complete ) { // flag to reset z position bool reset = false; /* calculate new height based on s * (distance traveled in FSR) */ float z = track->z_height + s * cos(p_angle); // check if still in same FSR (fine axial interval) int new_interval; if( pos_z_dir ) new_interval = get_pos_interval(z, fine_delta_z); else new_interval = get_neg_interval(z, fine_delta_z); if( new_interval == curr_interval ) { seg_complete = true; ds = s; } // otherwise, we need to recalculate distances else { // correct z if( pos_z_dir ) { curr_interval++; z = fine_delta_z * (float) curr_interval; } else{ curr_interval--; z = fine_delta_z * (float) curr_interval; } // calculate distance travelled in FSR (ds) ds = (z - track->z_height) / cos(p_angle); // update track length remaining s -= ds; /* check remaining track length to protect * against potential roundoff errors */ if( s <= 0 ) seg_complete = true; // check if out of bounds or track complete if( z <= 0 || z >= node_delta_z ) { // mark segment as completed seg_complete = true; // remember to no longer treat this track if ( pos_z_dir ) end_stacked--; else begin_stacked++; // reset z height reset = true; } } // pick a random FSR (cache miss expected) #ifdef OPENMP long QSR_id = rand_r(&seed) % I->n_source_regions_per_node; #else long QSR_id = rand() % I->n_source_regions_per_node; #endif /* update sources and fluxes from attenuation * over FSR */ if( I->axial_exp == 2 ) { attenuate_fluxes( track, true, ¶ms->sources[QSR_id], I, params, ds, mu, params->tracks_2D[i].az_weight, &A ); segments_processed++; } else if( I->axial_exp == 0 ) { attenuate_FSR_fluxes( track, true, ¶ms->sources[QSR_id], I, params, ds, mu, params->tracks_2D[i].az_weight, &A ); segments_processed++; } else { printf("Error: invalid axial expansion order"); printf("\n Please input 0 or 2\n"); exit(1); } // update with new z height or reset if finished if( n == params->tracks_2D[i].n_segments - 1 || reset) { if( pos_z_dir) track->z_height = I->axial_z_sep * k; else track->z_height = I->axial_z_sep * (k+1); } else track->z_height = z; } } } } } #ifdef OPENMP if(thread == 0 && I->mype==0) printf("\n"); #endif #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events, I); #endif } I->segments_processed = segments_processed; return; } // run one full transport sweep, return k void two_way_transport_sweep( Params * params, Input * I ) { if(I->mype==0) printf("Starting transport sweep ...\n"); // calculate the height of a node's domain and of each FSR double node_delta_z = I->height / I->decomp_assemblies_ax; int num_intervals = (I->cai * I->fai); double fine_delta_z = node_delta_z / num_intervals; /* loop over tracks (implicitly azimuthal angles, tracks in azimuthal * angles, polar angles, and z stacked rays) */ long segments_processed = 0; #pragma omp parallel default(none) \ shared( I, params, node_delta_z, fine_delta_z, num_intervals ) \ reduction(+ : segments_processed ) { #ifdef OPENMP int thread = omp_get_thread_num(); int nthreads = omp_get_num_threads(); unsigned int seed = time(NULL) * (thread+1); #endif //print_Input_struct( I ); #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events, I); } #endif AttenuateVars A; float * ptr = (float * ) malloc( I->n_egroups * 14 * sizeof(float)); A.q0 = ptr; ptr += I->n_egroups; A.q1 = ptr; ptr += I->n_egroups; A.q2 = ptr; ptr += I->n_egroups; A.sigT = ptr; ptr += I->n_egroups; A.tau = ptr; ptr += I->n_egroups; A.sigT2 = ptr; ptr += I->n_egroups; A.expVal = ptr; ptr += I->n_egroups; A.reuse = ptr; ptr += I->n_egroups; A.flux_integral = ptr; ptr += I->n_egroups; A.tally = ptr; ptr += I->n_egroups; A.t1 = ptr; ptr += I->n_egroups; A.t2 = ptr; ptr += I->n_egroups; A.t3 = ptr; ptr += I->n_egroups; A.t4 = ptr; #pragma omp for schedule( dynamic ) for (long i = 0; i < I->ntracks_2D; i++) { // print progress #ifdef OPENMP if(I->mype==0 && thread == 0) { printf("\rAttenuating Tracks... (%.0lf%% completed)", (i / ( (double)I->ntracks_2D / (double) nthreads )) / (double) nthreads * 100.0); } #else if( i % 50 == 0) if(I->mype==0) printf("%s%ld%s%ld\n","2D Tracks Completed = ", i," / ", I->ntracks_2D ); #endif // allocate arrays for segment storage FIXME double ** seg_dist = malloc( I->z_stacked * sizeof(double *) ); Source *** seg_src = malloc( I->z_stacked * sizeof(Source**) ); int * seg_idx = malloc( I->z_stacked * sizeof(int) ); int * seg_size = malloc( I->z_stacked * sizeof(int) ); // fill matrix with arrays FIXME for( int k = 0; k < I->z_stacked; k++) { seg_size[k] = 2 * I->segments_per_track; seg_dist[k] = malloc( seg_size[k] * sizeof(double) ); seg_src[k] = malloc( seg_size[k] * sizeof(Source *) ); seg_idx[k] = 0; } // treat positive-z traveling rays first bool pos_z_dir = true; for( int j = 0; j < I->n_polar_angles; j++) { if( j == I->n_polar_angles / 2 ) pos_z_dir = false; float p_angle = params->polar_angles[j]; float mu = cos(p_angle); // start with all z stacked rays int begin_stacked = 0; int end_stacked = I->z_stacked; // reset semgnet indexes for( int k = 0; k < I->z_stacked; k++) seg_idx[k] = 0; for( int n = 0; n < params->tracks_2D[i].n_segments; n++) { // calculate distance traveled in cell if segment completed float s_full = params->tracks_2D[i].segments[n].length / sin(p_angle); // allocate varaible for distance traveled in an FSR float ds = 0; // loop over remaining z-stacked rays int tracks_completed = 0; for( int k = begin_stacked; k < end_stacked; k++) { // select current track Track * track = ¶ms->tracks[i][j][k]; // determine current axial interval int interval = (int) track->z_height / fine_delta_z; // calculate distance to domain boundary float bound_dist; if( pos_z_dir) bound_dist = (node_delta_z - track->z_height) / mu; else bound_dist = -track->z_height / mu; // determine track length float s; if( s_full < bound_dist ) s = s_full; else { // note completion of track s = bound_dist; tracks_completed++; } // set flag for completeion of segment bool seg_complete = false; while( !seg_complete ) { // initialize tracking variables long QSR_id = interval + num_intervals * n; float ds; float z; // calculate z height of next fine axial interval float fai_z_height; if( pos_z_dir ) fai_z_height = (interval + 1) * fine_delta_z ; else fai_z_height = interval * fine_delta_z; // calculate z distance to next fine axial interval float z_dist_to_fai = fai_z_height - track->z_height; /* calculate total distance (s) to fine axial * interval */ float s_dist_to_fai = z_dist_to_fai / mu; // determine if a fine axial interval is crossed if( s_dist_to_fai < s ) { if( pos_z_dir ) interval++; else interval--; ds = s_dist_to_fai; z = track->z_height + z_dist_to_fai; } else { ds = s; z = track->z_height + s * mu; } /* shorten remaining segment length and check if * completed (accounting for potential roundoff) */ s -= ds; if( s <= 0 || interval < 0 || interval >= num_intervals) seg_complete = true; // pick a random FSR (cache miss expected) #ifdef OPENMP QSR_id = rand_r(&seed) % I->n_source_regions_per_node; #else QSR_id = rand() % I->n_source_regions_per_node; #endif /* update sources and fluxes from attenuation * over FSR */ if( I->axial_exp == 2 ) { attenuate_fluxes( track, true, ¶ms->sources[QSR_id], I, params, ds, mu, params->tracks_2D[i].az_weight, &A ); segments_processed++; } else if( I->axial_exp == 0 ) attenuate_FSR_fluxes( track, true, ¶ms->sources[QSR_id], I, params, ds, mu, params->tracks_2D[i].az_weight, &A ); else { printf("Error: invalid axial expansion order"); printf("\n Please input 0 or 2\n"); exit(1); } // update track height track->z_height = z; // save segment length and source FIXME seg_dist[k][seg_idx[k]] = ds; seg_src[k][seg_idx[k]] = ¶ms->sources[QSR_id]; seg_idx[k]++; // check if array needs to grow FIXME if( seg_idx[k] >= seg_size[k] ) { seg_size[k] *= 2; seg_dist[k] = (double *) realloc( seg_dist[k], seg_size[k] * sizeof(double) ); seg_src[k] = (Source **) realloc( seg_src[k], seg_size[k] * sizeof(Source *) ); } } } if(pos_z_dir) end_stacked -= tracks_completed; else begin_stacked += tracks_completed; } // loop over all z stacked rays again for( int k = 0; k < I->z_stacked; k++ ) { for( int n = seg_idx[k]-1; n >= 0; n--) { // load distance float ds = seg_dist[k][n]; // select current track Track * track = ¶ms->tracks[i][j][k]; // update sources and fluxes from attenuation over FSR if( I->axial_exp == 2 ) { attenuate_fluxes( track, false, seg_src[k][n], I, params, ds, -mu, params->tracks_2D[i].az_weight, &A ); segments_processed++; } else if( I->axial_exp == 0 ) attenuate_FSR_fluxes( track, false, seg_src[k][n], I, params, ds, -mu, params->tracks_2D[i].az_weight, &A ); // update z height track->z_height -= ds * mu; } } /* Update all tracks with correct starting z location again * NOTE: this is only here to acocunt for roundoff error */ for( int k = 0; k < I->z_stacked; k++) { Track * track = ¶ms->tracks[i][j][k]; if( pos_z_dir) track->z_height = I->axial_z_sep * k; else track->z_height = I->axial_z_sep * (k+1); } } // free memory for( int k = 0; k < I->z_stacked; k++) { free(seg_dist[k]); free(seg_src[k]); } free(seg_dist); free(seg_src); free(seg_idx); free(seg_size); } #ifdef OPENMP if(thread == 0 && I->mype==0) printf("\n"); #endif #ifdef PAPI if( thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events, I); #endif } //printf("Number of segments processed: %ld\n", segments_processed); I->segments_processed = segments_processed; return; } /* returns integer number for axial interval for tracks traveling in the * positive direction */ int get_pos_interval( float z, float dz) { int interval = (int) (z/dz); return interval; } /* returns integer number for axial interval for tracks traveling in the * negative direction */ int get_neg_interval( float z, float dz) { // NOTE: a bit of trickery using floors to obtain ceils int interval = INT_MAX - (int) ( (double) INT_MAX - (double) ( z / dz ) ); return interval; } int calc_next_fai( float z, float dz, bool pos_dir) { int interval = z/dz; float lower_z = dz * (float) interval; if(pos_dir) return interval + 1; else return interval; } /* Determines the change in angular flux along a particular track across a fine * axial region and tallies the contribution to the scalar flux in the fine * axial region. This function assumes a quadratic source, which is calculated * on the fly using neighboring source values. * * This legacy function is unused since it is less efficient than the current * attenuate_fluxes function. However, it provides a more straightforward * description of the underlying physical problem. */ void alt_attenuate_fluxes( Track * track, bool forward, Source * QSR, Input * I, Params * params, float ds, float mu, float az_weight ) { // compute fine axial interval spacing float dz = I->height / (I->fai * I->decomp_assemblies_ax * I->cai); // compute z height in cell float zin = track->z_height - dz * ( (int)( track->z_height / dz ) + 0.5 ); // compute fine axial region ID int fine_id = (int) ( track->z_height / dz ) % I->fai; // compute weight (azimuthal * polar) // NOTE: real app would also have volume weight component float weight = track->p_weight * az_weight; float mu2 = mu * mu; // load fine source region flux vector float * FSR_flux = QSR -> fine_flux[fine_id]; // cycle over energy groups for( int g = 0; g < I->n_egroups; g++) { // load total cross section float sigT = QSR->sigT[g]; // define source parameters float q0, q1, q2; // calculate source components if( fine_id == 0 ) { // load neighboring sources float y2 = QSR->fine_source[fine_id][g]; float y3 = QSR->fine_source[fine_id+1][g]; // do linear "fitting" float c0 = y2; float c1 = (y3 - y2) / dz; // calculate q0, q1, q2 q0 = c0 + c1*zin; q1 = c1; q2 = 0; } else if( fine_id == I->fai - 1 ) { // load neighboring sources float y1 = QSR->fine_source[fine_id-1][g]; float y2 = QSR->fine_source[fine_id][g]; // do linear "fitting" float c0 = y2; float c1 = (y2 - y1) / dz; // calculate q0, q1, q2 q0 = c0 + c1*zin; q1 = c1; q2 = 0; } else { // load neighboring sources float y1 = QSR->fine_source[fine_id-1][g]; float y2 = QSR->fine_source[fine_id][g]; float y3 = QSR->fine_source[fine_id+1][g]; // do quadratic "fitting" float c0 = y2; float c1 = (y1 - y3) / (2*dz); float c2 = (y1 - 2*y2 + y3) / (2*dz*dz); // calculate q0, q1, q2 q0 = c0 + c1*zin + c2*zin*zin; q1 = c1 + 2*c2*zin; q2 = c2; } // calculate common values for efficiency float tau = sigT * ds; float sigT2 = sigT * sigT; // compute exponential ( 1 - exp(-x) ) using table lookup float expVal = interpolateTable( params->expTable, tau ); // load correct angular flux vector float * psi; if(forward) psi = track->f_psi; else psi = track->b_psi; // add contribution to new source flux float flux_integral = (q0 * tau + (sigT * psi[g] - q0) * expVal) / sigT2 + q1 * mu * (tau * (tau - 2) + 2 * expVal) / (sigT * sigT2) + q2 * mu2 * (tau * (tau * (tau - 3) + 6) - 6 * expVal) / (3 * sigT2 * sigT2); #pragma omp atomic FSR_flux[g] += weight * flux_integral; // update angular flux psi[g] = psi[g] * (1.0 - expVal) + q0 * expVal / sigT + q1 * mu * (tau - expVal) / sigT2 + q2 * mu2 * (tau * (tau - 2) + 2 * expVal) / (sigT2 * sigT); } } /* Determines the change in angular flux along a particular track across a fine * axial region and tallies the contribution to the scalar flux in the fine * axial region. This function assumes a constant source. */ void attenuate_FSR_fluxes( Track * track, bool forward, Source * FSR, Input * I, Params * params_in, float ds, float mu, float az_weight, AttenuateVars *A) { // upack attenuate vars struct float * restrict tally = A->tally; float * restrict expVal = A->expVal; float * restrict sigT = A->sigT; float * restrict tau = A->tau; Params params = * params_in; // compute fine axial interval spacing float dz = I->height / (I->fai * I->decomp_assemblies_ax * I->cai); // compute z height in cell float zin = track->z_height - dz * ( (int)( track->z_height / dz ) + 0.5f ); // compute fine axial region ID int fine_id = (int) ( track->z_height / dz ) % I->fai; // compute weight (azimuthal * polar) // NOTE: real app would also have volume weight component float weight = track->p_weight * az_weight * mu; // load fine source region flux vector float * FSR_flux = FSR -> fine_flux[fine_id]; // cycle over energy groups #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I->n_egroups; g++) { // load total cross section sigT[g] = FSR->sigT[g]; tau[g] = sigT[g] * ds; } // compute exponential ( 1 - exp(-x) ) using table lookup #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for(int g = 0; g < I->n_egroups; g++) { expVal[g] = interpolateTable( params.expTable, tau[g] ); } float * psi; if(forward) psi = track->f_psi; else psi = track->b_psi; #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I->n_egroups; g++) { // compute angular flux attenuation float q = FSR->fine_source[fine_id][g] / sigT[g]; float delta_psi = (psi[g] - q) * expVal[g]; // add contribution to new source flux tally[g] = weight * delta_psi; // update angular flux psi[g] -= delta_psi; } #ifdef OPENMP omp_set_lock(&FSR->locks[fine_id]); #endif #ifdef INTEL #pragma simd #elif defined IBM #pragma simd_level(10) #endif for( int g = 0; g < I->n_egroups; g++) { FSR_flux[g] += tally[g]; } #ifdef OPENMP omp_unset_lock(&FSR->locks[fine_id]); #endif } /* Renormalizes scalar and angular flux for next transport sweep iteration. * Calculation requires multiple pair-wise sums and a reduction accross all * nodes. */ void renormalize_flux( Params params, Input I, CommGrid grid ) { if( I.mype == 0 ) printf("Renormalizing Flux...\n"); float node_fission_rate = 0; #ifdef OPENMP #pragma omp parallel default(none) shared(params, I, grid) \ reduction(+ : node_fission_rate) { #endif // tally total fission rate (pair-wise sum) float * fission_rates = malloc( I.n_source_regions_per_node * sizeof(float) ); float * fine_fission_rates = malloc( I.fai * sizeof(float) ); float * g_fission_rates = malloc( I.n_egroups * sizeof(float) ); // accumulate total fission rate on node domain #pragma omp for schedule(dynamic) for( int i = 0; i < I.n_source_regions_per_node; i++) { Source src = params.sources[i]; for( int j = 0; j < I.fai; j++) { for( int g = 0; g < I.n_egroups; g++) g_fission_rates[g] = src.fine_flux[j][g] * src.vol * src.XS[g][0]; fine_fission_rates[j] = pairwise_sum( g_fission_rates, I.n_egroups ); } fission_rates[i] = pairwise_sum( fine_fission_rates, I.fai ); } node_fission_rate = pairwise_sum(fission_rates, I.n_source_regions_per_node); // free allocated memory free(fission_rates); free(fine_fission_rates); free(g_fission_rates); #ifdef OPENMP } #endif #ifdef MPI // accumulate total fission rate by MPI Allreduce float total_fission_rate = 0; MPI_Barrier(grid.cart_comm_3d); MPI_Allreduce( &node_fission_rate, // Send Buffer &total_fission_rate, // Receive Buffer 1, // Element Count MPI_FLOAT, // Element Type MPI_SUM, // Reduciton Operation Type grid.cart_comm_3d ); // MPI Communicator MPI_Barrier(grid.cart_comm_3d); #else float total_fission_rate = node_fission_rate; #endif // normalize fluxes by fission reaction rate float norm_factor = 1.0 / total_fission_rate; #pragma omp parallel for default(none) \ shared(I, params) private(norm_factor) schedule(dynamic) for( int i = 0; i < I.n_source_regions_per_node; i++) { Source * src = ¶ms.sources[i]; float adjust = norm_factor * 4 * M_PI * I.fai / src->vol; for( int k = 0; k < I.fai; k++) for( int g = 0; g < I.n_egroups; g++) src->fine_flux[k][g] *= adjust; } // normalize boundary fluxes by same factor #pragma omp parallel for default(none) \ shared(I, params) private(norm_factor) schedule(dynamic) for( long i = 0; i < I.ntracks_2D; i++) for( int j = 0; j < I.n_polar_angles; j++) for( int k = 0; k < I.z_stacked; k++) for( int g = 0; g < I.n_egroups; g++) { params.tracks[i][j][k].f_psi[g] *= norm_factor; params.tracks[i][j][k].b_psi[g] *= norm_factor; } if( I.mype == 0 ) printf("Renormalizing Flux Complete.\n"); return; } /* Updates sources for next iteration by computing scattering and fission * components. Calculation includes multiple pair-wise sums and reductions * accross all nodes */ float update_sources( Params params, Input I, float keff ) { // source residual float residual; // calculate inverse multiplication facotr for efficiency float inverse_k = 1.0 / keff; // allocate residual arrays float * group_res = (float *) malloc(I.n_egroups * sizeof(float)); float * fine_res = (float *) malloc(I.fai * sizeof(float)); float * residuals = (float *) malloc(I.n_source_regions_per_node * sizeof(float)); // allocate arrays for summation float * fission_rates = malloc(I.n_egroups * sizeof(float)); float * scatter_rates = malloc(I.n_egroups * sizeof(float)); // cycle through all coarse axial intervals to update source for( long i = 0; i < I.n_source_regions_per_node; i++) { Source src = params.sources[i]; // cycle thorugh all fine axial regions to calculate new source for( int j = 0; j < I.fai; j++) { // calculate total fission source and scattering source float fission_source; float scatter_source; // compute total fission source for( int g = 0; g < I.n_egroups; g++ ) fission_rates[g] = src.fine_flux[j][g] * src.XS[g][0]; fission_source = pairwise_sum( fission_rates, (long) I.n_egroups); // normalize fission source by multiplication factor fission_source *= inverse_k; // compute scattering and new total source for each group for( int g = 0; g < I.n_egroups; g++ ) { for( int g2 = 0; g2 < I.n_egroups; g2++ ) { // compute scatter source originating from g2 -> g scatter_rates[g2] = src.scattering_matrix[g][g2] * src.fine_flux[j][g2]; } scatter_source = pairwise_sum(scatter_rates, (long) I.n_egroups); // compuate new total source float chi = src.XS[g][2]; // calculate new fine source float newSrc = (fission_source * chi + scatter_source) / (4.0 * M_PI); // calculate residual float oldSrc = src.fine_source[j][g]; group_res[g] = (newSrc - oldSrc) * (newSrc - oldSrc) / (oldSrc * oldSrc); /* calculate new source in fine axial interval assuming * isotropic source components */ src.fine_source[j][g] = newSrc; } fine_res[j] = pairwise_sum(group_res, (long) I.n_egroups); } residuals[i] = pairwise_sum(fine_res, (long) I.fai); } // calculate source residual residual = pairwise_sum(residuals, I.n_source_regions_per_node); // free memory free(fission_rates); free(scatter_rates); free(group_res); free(fine_res); free(residuals); // NOTE: See code around line 600 of CPUSolver.cpp in ClosedMOC/ OpenMOC return residual; } /* Computes globall k-effective using multiple pair-wise summations and finally * a reduction accross all nodes */ float compute_keff(Params params, Input I, CommGrid grid) { // allocate temporary memory float * sigma = malloc( I.n_egroups * sizeof(float) ); float * group_rates = malloc( I.n_egroups * sizeof(float) ); float * fine_rates = malloc( I.fai * sizeof(float) ); float * QSR_rates = malloc( I.n_source_regions_per_node * sizeof(float) ); /////////////////////////////////////////////////////////////////////////// // compute total absorption rate, looping over source regions for( long i = 0; i < I.n_source_regions_per_node; i++) { // load absorption XS data Source src = params.sources[i]; for( int g = 0; g < I.n_egroups; g++) sigma[g] = src.XS[g][1]; for( int j = 0; j < I.fai; j++ ) { // calculate absorption rates float * fine_flux = src.fine_flux[j]; for( int g = 0; g < I.n_egroups; g++) group_rates[g] = sigma[g] * fine_flux[g]; // sum absorption over all energy groups fine_rates[j] = pairwise_sum( group_rates, (long) I.n_egroups ); } // sum absorption over all fine axial intervals QSR_rates[i] = pairwise_sum( fine_rates, (long) I.fai ); } // sum absorption over all source regions in a node float node_abs = pairwise_sum( QSR_rates, I.n_source_regions_per_node); /////////////////////////////////////////////////////////////////////////// // compute total absorption rate, looping over source regions for( long i = 0; i < I.n_source_regions_per_node; i++) { // load nuSigmaF XS data Source src = params.sources[i]; for( int g = 0; g < I.n_egroups; g++) sigma[g] = src.XS[g][0]; for( int j = 0; j < I.fai; j++ ) { // calculate absorption rates float * fine_flux = src.fine_flux[j]; for( int g = 0; g < I.n_egroups; g++) group_rates[g] = sigma[g] * fine_flux[g]; // sum fission over all energy groups fine_rates[j] = pairwise_sum( group_rates, (long) I.n_egroups ); } // sum fission over all fine axial intervals QSR_rates[i] = pairwise_sum( fine_rates, (long) I.fai ); } // sum fission over all source regions in a node float node_fission = pairwise_sum( QSR_rates, I.n_source_regions_per_node); /////////////////////////////////////////////////////////////////////////// // MPi Reduction float tot_abs = 0; float tot_fission = 0; float leakage = 0; #ifdef MPI // Total Absorption Reduction MPI_Reduce( &node_abs, // Send Buffer &tot_abs, // Receive Buffer 1, // Element Count MPI_FLOAT, // Element Type MPI_SUM, // Reduciton Operation Type 0, // Master Rank grid.cart_comm_3d ); // MPI Communicator // Total Fission Reduction MPI_Reduce( &node_fission, // Send Buffer &tot_fission, // Receive Buffer 1, // Element Count MPI_FLOAT, // Element Type MPI_SUM, // Reduciton Operation Type 0, // Master Rank grid.cart_comm_3d ); // MPI Communicator // Total Leakage Reduction MPI_Reduce( params.leakage, // Send Buffer &leakage, // Receive Buffer 1, // Element Count MPI_FLOAT, // Element Type MPI_SUM, // Reduciton Operation Type 0, // Master Rank grid.cart_comm_3d ); // MPI Communicator MPI_Barrier(grid.cart_comm_3d); // calculate keff float keff = tot_fission/ (tot_abs + leakage); #else float keff = node_fission / (node_abs + *params.leakage); #endif /////////////////////////////////////////////////////////////////////////// // free memory free(sigma); free(group_rates); free(fine_rates); free(QSR_rates); return keff; } /* Interpolates a formed exponential table to compute ( 1- exp(-x) ) * at the desired x value */ float interpolateTable( Table table, float x) { // check to ensure value is in domain if( x > table.maxVal ) return 1.0f; else { int interval = (int) ( x / table.dx + 0.5f * table.dx ); /* if( interval >= table.N || interval < 0) { printf( "Interval = %d\n", interval); printf( "N = %d\n", table.N); printf( "x = %f\n", x); printf( "dx = %f\n", table.dx); exit(1); } */ float slope = table.values[ 2 * interval ]; float intercept = table.values[ 2 * interval + 1 ]; float val = slope * x + intercept; return val; } }