/* Copyright (C) 2013, Northwestern University Please email questions to Wei-keng Liao This program reports the I/O performance of writing 3D arrays of integer type using PnetCDF and HDF5 methods underneath. The program uses netCDF-4 APIs to write 3D arrays in parallel to a shared file. The global 3D array is partitioned among processes in a block-block-block fashion along X, Y, and Z dimensions. To choose PnetCDF or HDF5 method to carry out parallel I/O underneath, one can simply add the following file create mode: NC_MPIIO - for using PnetCDF method NC_MPIIO | NC_NETCDF4 - for using HDF5 method Note using NC_MPIIO is no longer required, as it has been deprecated since NetCDF 4.6.2. Compile and run commands are given below: 1. At the time of this program was developed, the following libraries were used. HDF5 version 1.8.10 netCDF version 4.4.0 and PnetCDF version 1.4.0 2. To build netCDF, the following configure options were used. ./configure --prefix=/usr/local \ --disable-shared \ --enable-netcdf-4 \ --enable-pnetcdf \ FC=mpif90 CXX=mpicxx CC=mpicc 3. To compile and link this example program: mpicc -O2 -o coll_perf_nc4.c ./coll_perf_nc4 \ -I/path/PnetCDF/include -I/path/netCDF/include -I/path/HDF5/include \ -L/path/PnetCDF/lib -L/path/netCDF/lib -L/path/HDF5/lib \ -lnetcdf -lhdf5_hl -lhdf5 -lpnetcdf -lz -lcurl -ldl -lm 4. Run command (an example of using 16 MPI processes): mpiexec -n 16 ./coll_perf_nc4 64 /orangefs/wkliao/testfile Global array size = 256 x 256 x 64 MPI process topology = 4 x 4 x 1 Number of variables = 10 Total variable size = 160.00 MiB -- I/O method: PnetCDF -- Max time among all processes = 3.54 sec Write bandwidth = 45.16 MiB/sec -- I/O method: HDF5 -- Max time among all processes = 3.54 sec Write bandwidth = 45.16 MiB/sec 5. Output files: Two output file (one in CDF-2 format and the other in HDF5 format) will be created. The screenshot of running command "ncdump -h" to the CDF-2 file is given below. % ncdump -h /orangefs/wkliao/testfile.nc netcdf testfile { dimensions: Z = 256 ; Y = 256 ; X = 64 ; variables: int var_0(Z, Y, X) ; int var_1(Z, Y, X) ; int var_2(Z, Y, X) ; int var_3(Z, Y, X) ; int var_4(Z, Y, X) ; int var_5(Z, Y, X) ; int var_6(Z, Y, X) ; int var_7(Z, Y, X) ; int var_8(Z, Y, X) ; int var_9(Z, Y, X) ; } % ncdump -k /orangefs/wkliao/testfile.nc 64-bit offset */ #include #include #include #include #include #include #define ERR {if(err!=NC_NOERR) {printf("Error at line=%d: %s\n", __LINE__, nc_strerror(err)); return 0;}} #define NDIMS 3 #define NVARS 10 /*----< netcdf4_io() >--------------------------------------------------------*/ int netcdf4_io(int io_method, char *filename, size_t *gsizes, /* [3] global array size */ size_t *start, /* [3] access start offset to the global array */ size_t *count) /* [3] access length */ { char varname[16]; int i, j, rank, err, ncid, cmode, *varid, dimid[3], **buf; size_t buf_len; MPI_Info info=MPI_INFO_NULL; MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* decide which I/O method to use. * Using NC_MPIIO is no longer required, as it has been deprecated since * NetCDF 4.6.2 */ cmode = NC_CLOBBER; if (io_method == 0) /* use PnetCDF to carry out I/O and use CDF-2 file format */ cmode |= NC_MPIIO | NC_64BIT_OFFSET; else /* use HDF5 plus MPI-IO to carry out I/O */ cmode |= NC_MPIIO | NC_NETCDF4; /* create the file in parallel */ err = nc_create_par(filename, cmode, MPI_COMM_WORLD, info, &ncid); ERR /* free info object */ if (info != MPI_INFO_NULL) MPI_Info_free(&info); /* define dimensions */ err = nc_def_dim(ncid, "Z", gsizes[0], &dimid[0]); ERR err = nc_def_dim(ncid, "Y", gsizes[1], &dimid[1]); ERR err = nc_def_dim(ncid, "X", gsizes[2], &dimid[2]); ERR /* define 3D variables of integer type */ varid = (int*) malloc(NVARS * sizeof(int)); for (i=0; i--------------------------------------------------------------*/ int main(int argc, char* argv[]) { char basename[128], filename[128], mpi_name[MPI_MAX_PROCESSOR_NAME]; int i, err, mpi_namelen, rank, nprocs, rank_d[NDIMS], verbose=0; int psizes[NDIMS], len, io_method; size_t start[NDIMS], count[NDIMS], gsizes[NDIMS]; double timing[2], max_t[2]; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Get_processor_name(mpi_name,&mpi_namelen); if (argc != 3) { if (!rank) printf("Usage: %s len filename\n",argv[0]); MPI_Finalize(); return 0; } if (verbose) printf("rank %2d runs on host %s\n",rank,mpi_name); len = atoi(argv[1]); strcpy(basename, argv[2]); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(basename, 128, MPI_CHAR, 0, MPI_COMM_WORLD); for (i=0; i