diff --git a/install_hdf5.md b/install_hdf5.md new file mode 100644 index 00000000..74885e4a --- /dev/null +++ b/install_hdf5.md @@ -0,0 +1,17 @@ +Download source from: +https://www.hdfgroup.org/downloads/hdf5/source-code/ + +e.g.: +https://www.hdfgroup.org/package/hdf5-1-10-6-tar-bz2/?wpdmdl=14134&refresh=5e34a41db4c8c1580508189 + +decompress to a folder, e.g., /home/user + +./configure --prefix=/usr/local/hdf5 + +make + +make check # run test suite. + +make install + +make check-install # verify installation. diff --git a/niftypet/nipet/CMakeLists.txt b/niftypet/nipet/CMakeLists.txt index 70b59188..e2a926bf 100644 --- a/niftypet/nipet/CMakeLists.txt +++ b/niftypet/nipet/CMakeLists.txt @@ -34,3 +34,12 @@ install(EXPORT ${PROJECT_NAME}Targets FILE NiftyPET${PROJECT_NAME}Targets.cmake add_subdirectory(lm) add_subdirectory(prj) add_subdirectory(sct) +# HDF5 needed for GE data +set(HDF5_ROOT "/usr/local/hdf5/") +find_package(HDF5) +if(HDF5_FOUND) + message(STATUS "HDF5 include dir: ${HDF5_INCLUDE_DIRS}") + add_subdirectory(lm_sig) +else() + message(WARNING "Could not find HDF5. Not building lm_sig") +endif() diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 25c7e116..c4549c55 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -32,6 +32,8 @@ 'back_prj', 'frwd_prj', 'simulate_recon', 'simulate_sino', # sct 'vsm', + # signa + 'aux_sig', 'lm_sig', 'lminfo_sig', # optional 'video_dyn', 'video_frm', 'xnat'] # yapf: disable from pkg_resources import resource_filename @@ -72,10 +74,8 @@ xnat = None # > GE Signa -# from . import aux_sig - -# from . import lm_sig -# from .lm_sig.hst_sig import lminfo_sig +from . import aux_sig, lm_sig +from .lm_sig.hst_sig import lminfo_sig # for use in `cmake -DCMAKE_PREFIX_PATH=...` cmake_prefix = resource_filename(__name__, "cmake") diff --git a/niftypet/nipet/aux_sig.py b/niftypet/nipet/aux_sig.py new file mode 100755 index 00000000..0b84f3a4 --- /dev/null +++ b/niftypet/nipet/aux_sig.py @@ -0,0 +1,253 @@ +import logging +import os +from math import pi + +import h5py +import numpy as np + +log = logging.getLogger(__name__) + + +def constants_h5(pthfn): + with h5py.File(pthfn, 'r') as f: + # coincidence event mode + cncdmd = f['HeaderData']['AcqParameters']['EDCATParameters']['coinOutputMode'][0] + if cncdmd == 802: + # bytes per event in this mode: + bpe = 6 + log.info("list-mode data in NOMINAL mode (6 bytes per event)") + elif cncdmd == 803: + bpe = 16 + log.error( + "list-mode data in CALIBRATION mode (16 bytes per event) not currently supported") + elif cncdmd == 805: + bpe = 8 + log.error( + "the ist-mode data in ENERGY mode (8 bytes per event) not currently supported") + else: + bpe = 0 + log.error("list-mode data in UNKNOWN mode") + + # toff: scan start time marker (used as offset) + CntH5 = { + 'toff': f['HeaderData']['AcqStats']['frameStartCoincTStamp'][0], + 'Deff': f['HeaderData']['SystemGeometry']['effectiveRingDiameter'][0], + 'TFOV': f['HeaderData']['AcqParameters']['EDCATParameters']['transAxialFOV'][0], + 'cpitch': f['HeaderData']['SystemGeometry']['interCrystalPitch'][0], + 'bpitch': f['HeaderData']['SystemGeometry']['interBlockPitch'][0], + 'exLOR': f['HeaderData']['AcqParameters']['RxScanParameters']['extraRsForTFOV'][0], + 'axCB': f['HeaderData']['SystemGeometry']['axialCrystalsPerBlock'][0], + 'axBU': f['HeaderData']['SystemGeometry']['axialBlocksPerUnit'][0], + 'axUM': f['HeaderData']['SystemGeometry']['axialUnitsPerModule'][0], + 'axMno': f['HeaderData']['SystemGeometry']['axialModulesPerSystem'][0], + 'txCB': f['HeaderData']['SystemGeometry']['radialCrystalsPerBlock'][0], + 'txBU': f['HeaderData']['SystemGeometry']['radialBlocksPerUnit'][0], + 'txUM': f['HeaderData']['SystemGeometry']['radialUnitsPerModule'][0], + 'txMno': f['HeaderData']['SystemGeometry']['radialModulesPerSystem'][0], + 'MRD': f['HeaderData']['AcqParameters']['BackEndAcqFilters']['maxRingDiff'][0], + 'tau0': f['HeaderData']['AcqParameters']['EDCATParameters']['negCoincidenceWindow'][0], + 'tau1': f['HeaderData']['AcqParameters']['EDCATParameters']['posCoincidenceWindow'][0], + 'tauP': f['HeaderData']['AcqParameters']['EDCATParameters']['coincTimingPrecision'][0], + 'TOFC': f['HeaderData']['AcqParameters']['RxScanParameters']['tofCompressionFactor'] + [0], + 'LLD': f['HeaderData']['AcqParameters']['EDCATParameters']['lower_energy_limit'][0], + 'ULD': f['HeaderData']['AcqParameters']['EDCATParameters']['upper_energy_limit'][0], + 'BPE': bpe} + return CntH5 + + +def get_nbins(Cnt): + txUno = Cnt['txUM'] * Cnt['txMno'] + txCU = Cnt['txCB'] * Cnt['txBU'] + cpitch = Cnt['cpitch'] + minValue = np.ceil(2 * (np.arcsin( + (Cnt['TFOV'] * 10.0) / Cnt['Deff']) - np.floor(txUno * np.arcsin( + (Cnt['TFOV'] * 10.0) / Cnt['Deff']) / pi) * pi / txUno) / cpitch) + if txCU < minValue: + minValue = txCU + halfFanLORs = np.floor(txUno * np.arcsin( + (Cnt['TFOV'] * 10.0) / Cnt['Deff']) / pi) * txCU + minValue + W = 2 * int(halfFanLORs) + 2 * Cnt['exLOR'] + 1 + C = Cnt['txCB'] * Cnt['txBU'] * Cnt['txUM'] * Cnt['txMno'] * Cnt['axCB'] + if W > C: + W = C + return W + + +# =================================================================================== +# SCANNER CONSTANTS +def get_sig_constants(pthfn): + + if not os.path.isfile(pthfn): + print('e> coult not open the file HDF5 to get SIGNA constants') + return + + Cnt = constants_h5(pthfn) + + # default logging set to WARNING only (30) + Cnt['LOG'] = 30 + + # number of sinogram angles + NSBINS = get_nbins(Cnt) + # number of transxial crystals + NCRS = Cnt['txCB'] * Cnt['txBU'] * Cnt['txUM'] * Cnt['txMno'] + # number of rings + NRNG = Cnt['axCB'] * Cnt['axBU'] * Cnt['axUM'] * Cnt['axMno'] + # number of 2D sinograms + NSN = NRNG**2 - (NRNG-1) + # bootstrapping of the list-mode data; + # 0: None, 1: Not used (was non-parametric for the mMR), 2: parametric + Cnt['BTP'] = 0 + Cnt['NCRS'] = NCRS + Cnt['NRNG'] = NRNG + # defnie reduced detector ring + Cnt['RNG_END'] = NRNG + Cnt['RNG_STRT'] = 0 + Cnt['NSN'] = NSN + Cnt['NSBINS'] = NSBINS + Cnt['NSANGLES'] = NCRS // 2 + + Cnt['NSEG0'] = 2 * Cnt['axUM'] * Cnt['axCB'] - 1 + + # LM processing + # integration time of 1 sec + Cnt['ITIME'] = 1000 + # number of CUDA streams + Cnt['NSTREAMS'] = 32 + # number of elements per data chunk + # 2^{28} = 268435456 elements (6Bytes) to make up 1.6GB + Cnt['ELECHNK'] = (268435456 // Cnt['NSTREAMS']) + + # projection view integration time (length of the short time frames t = 2^VTIME) + Cnt['VTIME'] = 2 + # the short time frame projection views are limited to 90 mins only + Cnt['MXNITAG'] = 5400 + + # transaxial and axial crystal sizes in cm + Cnt['TXCRS'] = 0.395 + Cnt['AXCRS'] = 0.530 + # gap between axial detector units + Cnt['AXGAP'] = 0.280 + # axial FOV + Cnt['AXFOV'] = (Cnt['axUM'] - 1) * Cnt['AXGAP'] + Cnt['axUM'] * Cnt['axCB'] * Cnt['AXCRS'] + + return Cnt + + +# =================================================================================== +# AXIAL LUTS +def get_axLUT(Cnt): + + # calculated rings + NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] + NSN1_c = NRNG_c**2 - (NRNG_c-1) + + # get the sino LUTs + M = np.zeros((NRNG_c, NRNG_c), dtype=np.int16) + # sino index + Msn = np.zeros((NRNG_c, NRNG_c), dtype=np.int16) + # sino index SIGNA native + Msig = np.zeros((NRNG_c, NRNG_c), dtype=np.int16) + + sn_rno = np.zeros((NRNG_c**2, 2), dtype=np.int16) + + # diagonal linear index (positive only) + dli = 0 + # full diagonal linear index (positive and negative) + sni = 0 + + for ro in range(0, NRNG_c): + if ro == 0: + oblique = 1 + else: + oblique = 2 + for m in range(oblique): + # selects the sub-michelogram of the whole michelogram + strt = Cnt['NRNG'] * (ro + Cnt['RNG_STRT']) + Cnt['RNG_STRT'] + stop = (Cnt['RNG_STRT'] + NRNG_c) * Cnt['NRNG'] + step = Cnt['NRNG'] + 1 + + for li in range(strt, stop, step): + # goes along a diagonal started in the first row at r2o + # from the linear indices of michelogram get the subscript indices + if m == 0: + r0 = li // Cnt['NRNG'] + r1 = li - r0 * Cnt['NRNG'] + M[r1, r0] = dli + dli += 1 + else: + r1 = li // Cnt['NRNG'] + r0 = li - r1 * Cnt['NRNG'] + + Msn[r1, r0] = sni + + sn_rno[sni, 0] = r0 + sn_rno[sni, 1] = r1 + + sni += 1 + + # --- SIGNA native --- + rdiff = r0 - r1 + rsum = r0 + r1 + if (rdiff > 1): + angle = rdiff // 2 + if (angle <= Cnt['MRD'] / 2): + snis = rsum + (4*angle - 2) * Cnt['NRNG'] - (4*angle*angle - 1) + elif (rdiff < -1): + angle = -rdiff // 2 + if (angle <= Cnt['MRD'] // 2): + snis = rsum + (4*angle) * Cnt['NRNG'] - ((angle+1) * 4 * angle) + else: + snis = rsum + Msig[r1, r0] = snis + # ------ + axLUT = {'r2s': Msn, 'r2sig': Msig, 's2r': sn_rno} + return axLUT + + +# =================================================================================== +# TRANSIAXIAL LUTS +def get_txLUT(Cnt): + # number of bins per sinogram angle + NSBINS = Cnt['NSBINS'] + # number of sinogram angles + NSANGLES = Cnt['NSANGLES'] + # number of rings + NCRS = Cnt['NCRS'] + # crystal to sinogram index lookup table (LUT) + c2s = np.zeros((NCRS, NCRS), dtype=np.int32) + # sinogram to crystal index LUT + s2c = np.zeros((NSANGLES * NSBINS, 2), dtype=np.int16) + for c0 in range(NCRS): + for c1 in range(NCRS): + if ((NCRS // 2) <= (c0 + c1)) and ((c0 + c1) < (3 * NCRS // 2)): + iw = (NSBINS-1) // 2 + (c0 - c1 - NCRS//2) + else: + iw = (NSBINS-1) // 2 - (c0 - c1 - NCRS//2) + if (iw >= 0) and (iw <= (NSBINS - 1)): + ia = ((c0 + c1 + NCRS//2) % NCRS) // 2 + aw = ia + NSANGLES*iw + c2s[c1, c0] = aw + c2s[c0, c1] = aw + s2c[aw, 0] = c0 + s2c[aw, 1] = c1 + else: + c2s[c1, c0] = -1 + c2s[c0, c1] = -1 + + txLUT = {'c2s': c2s, 's2c': s2c} + return txLUT + + +def init_sig(pthfn): + + # get the constants for the mMR + Cnt = get_sig_constants(pthfn) + + # transaxial look up tables + txLUT = get_txLUT(Cnt) + + # axial look up tables + axLUT = get_axLUT(Cnt) + + return Cnt, txLUT, axLUT diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index cff4e6c0..b76d7fb6 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -155,9 +155,8 @@ def getinterfile_off(fmu, Cnt, Offst=OFFSET_DEFAULT): accounting for image offset (does slow interpolation). ''' # pead the image file - f = open(fmu, 'rb') - mu = np.fromfile(f, np.float32) - f.close() + with open(fmu, 'rb') as f: + mu = np.fromfile(f, np.float32) # save_im(mur, Cnt, os.path.dirname(fmu) + '/mur.nii') # ------------------------------------------------------------------------- @@ -179,9 +178,8 @@ def getinterfile_off(fmu, Cnt, Offst=OFFSET_DEFAULT): def getinterfile(fim, Cnt): '''Return the floating point image file in an array from an Interfile file.''' # pead the image file - f = open(fim, 'rb') - im = np.fromfile(f, np.float32) - f.close() + with open(fim, 'rb') as f: + im = np.fromfile(f, np.float32) # pumber of voxels nvx = im.shape[0] @@ -1016,9 +1014,8 @@ def hmu_offset(hdr): def rd_hmu(fh): # --read hdr file-- - f = open(fh, 'r') - hdr = f.read() - f.close() + with open(fh, 'r') as f: + hdr = f.read() # ----------------- # pegular expression to find the file name p = re.compile(r'(?<=:=)\s*\w*[.]\w*') @@ -1026,9 +1023,8 @@ def rd_hmu(fh): i1 = i0 + hdr[i0:].find('\n') fbin = p.findall(hdr[i0:i1])[0] # --read img file-- - f = open(os.path.join(os.path.dirname(fh), fbin.strip()), 'rb') - im = np.fromfile(f, np.float32) - f.close() + with open(os.path.join(os.path.dirname(fh), fbin.strip()), 'rb') as f: + im = np.fromfile(f, np.float32) # ----------------- return hdr, im diff --git a/niftypet/nipet/lm_sig/CMakeLists.txt b/niftypet/nipet/lm_sig/CMakeLists.txt new file mode 100644 index 00000000..f1b056f8 --- /dev/null +++ b/niftypet/nipet/lm_sig/CMakeLists.txt @@ -0,0 +1,28 @@ +project(lmproc_sig) + +file(GLOB SRC LIST_DIRECTORIES false "src/*.cu") +include_directories(src) +include_directories(${Python3_INCLUDE_DIRS}) +include_directories(${Python3_NumPy_INCLUDE_DIRS}) +include_directories(${HDF5_INCLUDE_DIRS}) + +add_library(${PROJECT_NAME} ${LIB_TYPE} ${SRC}) +add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) +target_include_directories(${PROJECT_NAME} PUBLIC + "$" + "$") +target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} ${HDF5_LIBRARIES} CUDA::cudart_static CUDA::curand_static) + +if(SKBUILD) +python_extension_module(${PROJECT_NAME}) +endif() +set_target_properties(${PROJECT_NAME} PROPERTIES + VERSION ${CMAKE_PROJECT_VERSION} + SOVERSION ${CMAKE_PROJECT_VERSION_MAJOR} + INTERFACE_${PROJECT_NAME}_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR}) +set_property(TARGET ${PROJECT_NAME} APPEND PROPERTY COMPATIBLE_INTERFACE_STRING ${PROJECT_NAME}_MAJOR_VERSION) +install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}Targets + INCLUDES DESTINATION niftypet/${CMAKE_PROJECT_NAME}/include + LIBRARY DESTINATION niftypet/${CMAKE_PROJECT_NAME}/lm_sig) +install(EXPORT ${PROJECT_NAME}Targets FILE NiftyPET${PROJECT_NAME}Targets.cmake + NAMESPACE NiftyPET:: DESTINATION niftypet/${CMAKE_PROJECT_NAME}/cmake) diff --git a/niftypet/nipet/lm_sig/__init__.py b/niftypet/nipet/lm_sig/__init__.py new file mode 100644 index 00000000..9179c661 --- /dev/null +++ b/niftypet/nipet/lm_sig/__init__.py @@ -0,0 +1,2 @@ +__all__ = ['hst_sig', 'lmproc_sig'] +from . import hst_sig, lmproc_sig diff --git a/niftypet/nipet/lm_sig/hst_sig.py b/niftypet/nipet/lm_sig/hst_sig.py new file mode 100644 index 00000000..a823ef5a --- /dev/null +++ b/niftypet/nipet/lm_sig/hst_sig.py @@ -0,0 +1,258 @@ +import logging +import os +from textwrap import dedent + +import h5py +import numpy as np +import scipy.ndimage as ndi + +# import the C-extension with CUDA +from . import lmproc_sig + +log = logging.getLogger(__name__) + + +def lminfo_sig(datain, Cnt, t0=0, t1=0): + + if not os.path.isfile(datain['lm_h5']): + raise IOError('LM HDF5 file not found!') + + with h5py.File(datain['lm_h5'], 'r') as f: + + if (f['HeaderData']['ListHeader']['isListCompressed'][0]) > 0: + raise IOError('The list mode data is compressed \ + and has to be first decompressed using GE proprietary software!') + + else: + log.debug('the list mode is decompressed [OK]') + + lm = f['ListData']['listData'] + + # find first time marker by reading first k=1 time markers + # event offset + eoff = 0 + # direction of time search: 1-forward + dsearch = 1 + # how many t-markers forward? + k_markers = 1 + + # first time marker + eoff_start, tstart, _ = lmproc_sig.nxtmrkr(datain['lm_h5'], Cnt['BPE'], eoff, k_markers, + dsearch) + + # last time marker + eoff_end, tend, _ = lmproc_sig.nxtmrkr(datain['lm_h5'], Cnt['BPE'], + (lm.shape[0] // Cnt['BPE']) - Cnt['BPE'], 1, -1) + + # total number of elements in the list mode data + totele = lm.shape[0] // Cnt['BPE'] + + # offset for first events + eoff_first = 0 + + # last event offset + eoff_last = totele - 1 + + if not t0 == t1 == 0: + + # update the times by the offset if it is greater than 0 + t1 += tstart // Cnt['ITIME'] + t0 += tstart // Cnt['ITIME'] + + if (t1 * Cnt['ITIME']) > tend: + t1 = (tend + Cnt['ITIME'] - 1) // Cnt['ITIME'] + + if (t0 * Cnt['ITIME']) <= tstart: + t0 = tstart // Cnt['ITIME'] + + log.debug('t0 = {}, t1 = {}'.format(t0, t1)) + + def find_tmark(t, tstart, tend, eoff_start, eoff_end, lmpth, bpe): + ''' + find the event offsets for time index t + to be used for list mode data processing + ''' + + trgt = int(t * Cnt['ITIME']) + + if trgt < tstart: + trgt = tstart + + if trgt > tend: + trgt = tend + + log.debug('target t_marker: {}'.format(trgt)) + + k_markers = 100 + eoff, tmrk, counts = lmproc_sig.nxtmrkr(lmpth, bpe, 0, k_markers, 1) + + # average recorded events per ms + epm = eoff / k_markers + + flg_done = False + while (abs(tmrk - trgt) > 10) or flg_done: + + skip_off = int(eoff + (trgt-tmrk) * epm) # + eoff_start + if skip_off >= eoff_end: + skip_off = int(totele - 0.25*epm*bpe) + log.debug('corrected offset to: {}'.format(skip_off)) + + if skip_off < eoff_start: + skip_off = int(eoff_start + bpe) + log.debug('corrected offset to: {}'.format(skip_off)) + + eoff_n, tmrk_n, _ = lmproc_sig.nxtmrkr(lmpth, bpe, skip_off, 1, + np.sign(trgt - tmrk)) + + if (tmrk_n == tmrk): + flg_done = True + else: + epm = (eoff_n-eoff) / (tmrk_n-tmrk) + + eoff = eoff_n + tmrk = tmrk_n + + log.debug('t_mark: {}'.format(tmrk)) + + # import pdb; pdb.set_trace() + if tmrk != trgt: + eoff, tmrk, _ = lmproc_sig.nxtmrkr(lmpth, bpe, eoff, abs(trgt - tmrk), + np.sign(trgt - tmrk)) # +1*((trgt-tmrk)<0) + + return eoff, tmrk + + # start + eoff0, tmrk0 = find_tmark(t0, tstart, tend, eoff_start, eoff_end, datain['lm_h5'], + Cnt['BPE']) + # stop + eoff1, tmrk1 = find_tmark(t1, tstart, tend, eoff_start, eoff_end, datain['lm_h5'], + Cnt['BPE']) + + # number of elements to be considered in the list mode data + ele = eoff1 - eoff0 + + else: + + eoff0 = eoff_first + eoff1 = eoff_last + + tmrk0 = tstart + tmrk1 = tend + + # number of elements to be considered in the list mode data + ele = totele + + # integration time tags (+1 for the end) + nitag = ((tmrk1-tmrk0) + Cnt['ITIME'] - 1) // Cnt['ITIME'] + + # update real time markers in seconds + t0 = tmrk0 // Cnt['ITIME'] + t1 = tmrk1 // Cnt['ITIME'] + + log.info( + dedent('''\ + ----------------------------------------------- + > the first time is: {}s at event address: {} + > the last time is: {}s at event address: {} + ------------------------------------------------ + > the start time is: {}s at event address: {} (used as offset) + > the stop time is: {}s at event address: {} + > the number of report itags is: {} + > ----------------------------------------------- + '''.format(tstart / Cnt['ITIME'], eoff_start, tend / Cnt['ITIME'], eoff_end, + tmrk0 / Cnt['ITIME'], eoff0, tmrk1 / Cnt['ITIME'], eoff1, nitag))) + + return { + 'nitag': nitag, 'nele': ele, 'totele': totele, 'tm0': tmrk0, 'tm1': tmrk1, + 'evnt_addr0': eoff0, 'evnt_addr1': eoff1, 'toff': tstart, 'tend': tend} + + +# =============================================================================== +# HISTOGRAM THE LIST-MODE DATA +FRMS = np.array([0], dtype=np.uint16) + + +def hist(datain, txLUT, axLUT, Cnt, frms=FRMS, use_stored=False, hst_store=False, t0=0, t1=0, + cmass_sig=5): + + # histogramming with bootstrapping: + # Cnt['BTP'] = 0: no bootstrapping [default]; + # Cnt['BTP'] = 2: parametric bootstrapping (using Poisson distribution with mean = 1) + + # gather info about the LM time tags + lmdct = lminfo_sig(datain, Cnt, t0, t1) + + # ==================================================================== + # SETTING UP CHUNKS + # divide the data into data chunks + # the default is to read around 1GB to be dealt with all streams (default: 32) + nchnk = (lmdct['nele'] + Cnt['ELECHNK'] - 1) // Cnt['ELECHNK'] + log.info('''\ + \r> duration by integrating time tags [s]: {} + \r> # chunks of data (initial): {} + \r> # elechnk: {}', + '''.format(lmdct['nitag'], nchnk, Cnt['ELECHNK'])) + + # divide the list mode data into chunks in terms of addresses of selected time tags + # break time tag + btag = np.zeros((nchnk + 1), dtype=np.int32) + + # address (position) in file (in bpe-bytes unit) + atag = np.zeros((nchnk + 1), dtype=np.uint64) + + # elements per thread to be dealt with + ele4thrd = np.zeros((nchnk), dtype=np.int32) + + # elements per data chunk + ele4chnk = np.zeros((nchnk), dtype=np.int32) + + # byte values for the whole event + bval = np.zeros(Cnt['BPE'], dtype=int) + + atag[0] = lmdct['evnt_addr0'] + btag[0] = 0 + + # LM data properties in a dictionary + lmprop = { + 'lmfn': datain['lm_h5'], 'bpe': Cnt['BPE'], 'nele': lmdct['nele'], 'nchk': nchnk, + 'nitg': lmdct['nitag'], 'toff': lmdct['toff'], 'tend': lmdct['tend'], 'tm0': lmdct['tm0'], + 'tm1': lmdct['tm1'], 'atag': atag, 'btag': btag, 'ethr': ele4thrd, 'echk': ele4chnk, + 'LOG': Cnt['LOG']} + + # get the setup into + lmproc_sig.lminfo(lmprop) + + # import pdb; pdb.set_trace() + + # --------------------------------------- + # preallocate all the output arrays + if (lmdct['nitag'] > Cnt['MXNITAG']): tn = Cnt['MXNITAG'] // (1 << Cnt['VTIME']) + else: tn = lmdct['nitag'] // (1 << Cnt['VTIME']) + + # sinogram projection views (sort timre frames govern by VTIME) + pvs = np.zeros((tn, 2 * Cnt['NRNG'] - 1, Cnt['NSBINS']), dtype=np.uint32) + # prompt head curve (counts per second) + phc = np.zeros((lmdct['nitag']), dtype=np.uint32) + # centre of mass of radiodistribution (axially only) + mss = np.zeros((lmdct['nitag']), dtype=np.float32) + # prompt sinogram + psino = np.zeros((Cnt['NRNG'] * Cnt['NRNG'], Cnt['NSBINS'], Cnt['NSANGLES']), dtype=np.uint32) + hstout = {'phc': phc, 'mss': mss, 'pvs': pvs, 'psn': psino} + + # do the histogramming and processing + lmproc_sig.hist(hstout, lmprop, frms, txLUT, axLUT, Cnt) + + # unpack short (interval) sinogram projection views + pvs_sgtl = np.array(hstout['pvs'] >> 8, dtype=float) + pvs_sgtl = pvs_sgtl[:, ::-1, :] + pvs_crnl = np.array(np.bitwise_and(hstout['pvs'], 255), dtype=float) + pvs_crnl = pvs_crnl[:, ::-1, :] + + cmass = 1 * ndi.filters.gaussian_filter(hstout['mss'], cmass_sig, mode='mirror') + # apply the axial dimensions in [cm] to the centre of mass + cmass = cmass * Cnt['AXFOV'] / Cnt['NSEG0'] + + hst = { + 'pvs_sgtl': pvs_sgtl, 'pvs_crnl': pvs_crnl, 'cmass': cmass, 'phc': hstout['phc'], + 'psino': np.transpose(hstout['psn'], (0, 2, 1)), 'dur': lmdct['nitag'], 'lmprop': lmprop} + return hst diff --git a/niftypet/nipet/lm_sig/src/hst_sig.cu b/niftypet/nipet/lm_sig/src/hst_sig.cu new file mode 100644 index 00000000..d1184f36 --- /dev/null +++ b/niftypet/nipet/lm_sig/src/hst_sig.cu @@ -0,0 +1,411 @@ +/*------------------------------------------------------------------------ +CUDA C extention for Python +Provides functionality for histogramming and processing list-mode data. + +author: Pawel Markiewicz +Copyrights: 2016, University College London +------------------------------------------------------------------------*/ + +#include "hst_sig.h" + +__constant__ short c_r2s[NRNG_S * NRNG_S]; + +__inline__ __device__ int tofBin(short d) { + short delta = d >> 7; + if ((delta < TAU1_S) || (delta > -TAU0_S)) { return (TAU0_S + delta) / TOFC_S; } + return -1; +} + +__inline__ __device__ unsigned char get_ssrbi(unsigned short d0, unsigned short d1) { + return (unsigned char)((d0 & 0x3f) + (d1 & 0x3f)); +} + +// __inline__ __device__ +// int sinoIdx(unsigned short d0, unsigned short d1){ + +// short2 r; +// r.x = d0&0x3f; +// r.y = d1&0x3f; + +// char rdiff = r.x - r.y; +// char rsum = r.x + r.y; + +// if (rdiff>1){ +// char angle = rdiff/2; +// if ( angle<=MRD_S/2 ) +// return rsum + (4*angle-2)*NRNG_S - (4*angle*angle - 1); +// } +// else if (rdiff<-1){ +// char angle = -rdiff/2; +// if (angle<=MRD_S/2) +// return rsum + (4*angle)*NRNG_S - ((angle+1)*4*angle); +// } +// else +// { +// return rsum; +// } +// return -1; +// } + +// __inline__ __device__ +// short2 sinoCrd(unsigned short d0, unsigned short d1) +// { +// short2 c; +// c.x = d0>>6; +// c.y = d1>>6; + +// short csum = c.x+c.y; + +// //radial bin index +// short iw; +// //angle index +// short ia; + +// if ( ((NCRS_S/2)<=csum) && ((3*NCRS_S/2)>csum) ){ +// iw = NSBINS_S/2 + (c.x-c.y-NCRS_S/2); +// } +// else{ +// iw = NSBINS_S/2 - (c.x-c.y-NCRS_S/2); +// } + +// ia = ((csum + NCRS_S/2)%NCRS_S)/2; + +// if ((iw < 0) || (iw >= NSBINS_S)) { +// printf("ed> sinogram index calculation failed!\n"); +// return make_short2(-1, -1); +// } +// return make_short2(ia, iw); +// //(ia + NSANGLES_S*iw) + NSANGLES_S*NSBINS_S*get_sni(d0, d1); +// } + +//===================================================================== +__global__ void hst(ushort3 *lm, unsigned int *rprmt, unsigned int *mass, unsigned int *pview, + unsigned int *sino, int *c2s, const int ele4thrd, const int elm, const int off, + const int tstart, const int tstop) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + int i_start, i_stop; + if (idx == (BTHREADS * NTHREADS - 1)) { + i_stop = off + elm; + i_start = off + (BTHREADS * NTHREADS - 1) * ele4thrd; + } else { + i_stop = off + (idx + 1) * ele4thrd; + i_start = off + idx * ele4thrd; + } + + // find the first time tag in this thread patch + int itag; // integration time tag + int i = i_start; + int tag = 0; + while (tag == 0) { + if ((lm[i].x & 0x7f) == 1) { + tag = 1; + itag = ((1 << 16) * lm[i].z + lm[i].y - tstart) / ITIME; // assuming that the tag is every + // 1ms + } + i++; + } + //*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*- + for (int i = i_start; i < i_stop; i++) { + if ((itag >= 0) && (itag < (tstop - tstart) / ITIME)) { + if ((lm[i].x & 0x7) == 5) { + // event (default 1, but for bootstrap can be 0,1,2,3...) + char Nevnt = 1; + // head curve + atomicAdd(rprmt + itag, 1); + // prompt sinogram + + // increment this: sino + txIdx + axIdx. Crystals and rings are converted to transaxial + // and axial sino indices + int aw = c2s[(lm[i].y >> 6) + (lm[i].z >> 6) * NCRS_S]; + atomicAdd(sino + aw + + NSANGLES_S * NSBINS_S * c_r2s[(lm[i].y & 0x3f) + (lm[i].z & 0x3f) * NRNG_S], + 1); + + // TOF bin index + int itof = tofBin(lm[i].x); + if (itof < 0) { + printf("eg> calculation of TOF index failed.\n"); + return; + } + // SSRB index + unsigned char ssri = get_ssrbi(lm[i].y, lm[i].z); + if (ssri > SEG0_S) { + printf("eg> calculation of SSRB index failed.\n"); + return; + } + // centre of mass + atomicAdd(&mass[itag], ssri); + // projection views + short wi = aw / NSANGLES_S; + short ai = aw - wi * NSANGLES_S; + short a0 = ai == 0 || ai == 223; + short a126 = ai == 112 || ai == 111; + if ((a0 || a126) && (itag < MXNITAG)) { + atomicAdd(pview + (itag >> VTIME) * SEG0_S * NSBINS_S + ssri * NSBINS_S + wi, + Nevnt << (a126 * 8)); + } + } else if ((lm[i].x & 0x7f) == 1) { + itag = ((1 << 16) * lm[i].z + lm[i].y - tstart) / ITIME; + } + // else if( (lm[i].x&0x7f)==0x19 ){ + // printf("u> t[%d]: %d - %d - %d \n", itag, lm[i].z, lm[i].y, lm[i].x); + // } + } + } + //*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*- +} + +//================================================================================ +//***** general variables used for streams +int ichnk; // indicator of how many chunks have been processed in the GPU. +int nchnkrd; // indicator of how many chunks have been read from disk. +int dataready[NSTREAMS]; +uint8_t *lmbuff; // data buffer +LMprop lmprop; +//================================================================================ + +//************ CHECK DEVICE MEMORY USAGE ********************* +void getMemUse(void) { + size_t free_mem; + size_t total_mem; + HANDLE_ERROR(cudaMemGetInfo(&free_mem, &total_mem)); + double free_db = (double)free_mem; + double total_db = (double)total_mem; + double used_db = total_db - free_db; + + if (lmprop.log <= LOGDEBUG) + printf("\ni> current GPU memory usage: %7.2f/%7.2f [MB]\n", used_db / 1024.0 / 1024.0, + total_db / 1024.0 / 1024.0); +} +//************************************************************ + +//================================================================================================ +//***** Stream Callback ***** +void CUDART_CB MyCallback(cudaStream_t stream, cudaError_t status, void *data) { + int i = (int)(size_t)data; + + if (lmprop.log <= LOGDEBUG) { + printf("+> stream[%d]: ", i); + printf("%d chunks of data are DONE. ", ichnk + 1); + } + + ichnk += 1; + if (nchnkrd < lmprop.nchnk) { + + herr_t hstatus; + H5setup h5set; + // start address for reading into the host buffer + h5set.start[0] = lmprop.bpe * (hsize_t)lmprop.atag[nchnkrd]; + // init with number of bytes to be read into the data chunk buffer + h5set = initHDF5(h5set, lmprop.fname, lmprop.bpe * (hsize_t)lmprop.ele4chnk[nchnkrd]); + if (h5set.status < 0) { + printf("e> Cannot initialise reading the HDF5 dataset (CUDART callback)!\n"); + return; + } + // prepare chunk + h5set.count[0] = lmprop.bpe * (hsize_t)lmprop.ele4chnk[nchnkrd]; + h5set.memspace = H5Screate_simple(h5set.rank, &h5set.count[0], NULL); + // select the chunk (slab) + hstatus = H5Sselect_hyperslab(h5set.dspace, H5S_SELECT_SET, &h5set.start[0], &h5set.stride[0], + &h5set.count[0], NULL); + if (hstatus < 0) { + printf("e> error selecting the HDF5 slab!\n"); + return; + } + // read the chunk + hstatus = H5Dread(h5set.dset, h5set.dtype, h5set.memspace, h5set.dspace, H5P_DEFAULT, + (void *)&lmbuff[i * ELECHNK_S * lmprop.bpe]); + if (hstatus < 0) { + printf("e> error reading HDF5 slab!\n"); + return; + } + + if (lmprop.log <= LOGDEBUG) { + printf("\n\t<> %d/%d data chunk (%luB) has been read from address: %lu\n", nchnkrd + 1, + lmprop.nchnk, (H5Sget_select_npoints(h5set.dspace)), h5set.start[0]); + printf("\n\t ele4chnk[%d]=%d", nchnkrd, lmprop.ele4chnk[nchnkrd]); + } + + // set a flag: stream[i] is free now and the new data is ready. + dataready[i] = 1; + nchnkrd += 1; + } else { + if (lmprop.log <= LOGDEBUG) printf("\n"); + } +} + +//================================================================================ +void gpu_hst(LMprop _lmprop, unsigned int *d_rprmt, unsigned int *d_mass, unsigned int *d_pview, + unsigned int *d_sino, int *d_c2s, short *r2s) { + // copy the LM properties to the global variable. + lmprop = _lmprop; + + // ring to sino index LUT to constant memory + cudaMemcpyToSymbol(c_r2s, r2s, NRNG_S * NRNG_S * sizeof(short)); + + // allocate mem for the list mode file + ushort3 *d_lmbuff; + HANDLE_ERROR( + cudaMallocHost((void **)&lmbuff, NSTREAMS * ELECHNK_S * (size_t)lmprop.bpe)); // host pinned + HANDLE_ERROR(cudaMalloc((void **)&d_lmbuff, NSTREAMS * ELECHNK_S * sizeof(ushort3))); // device + + if (lmprop.log <= LOGDEBUG) + printf("\ni> creating %d CUDA streams... ", min(NSTREAMS, lmprop.nchnk)); + + cudaStream_t stream[min(NSTREAMS, lmprop.nchnk)]; + for (int i = 0; i < min(NSTREAMS, lmprop.nchnk); ++i) HANDLE_ERROR(cudaStreamCreate(&stream[i])); + + if (lmprop.log <= LOGDEBUG) printf("DONE.\n"); + + // ****** check memory usage + getMemUse(); + //******* + + //__________________________________________________________________________________________________ + ichnk = 0; // indicator of how many chunks have been processed in the GPU. + nchnkrd = 0; // indicator of how many chunks have been read from disk. + + if (lmprop.log <= LOGDEBUG) + printf("\ni> reading the first LM chunks from HDF5 file:\n %s ", lmprop.fname); + + //---SETTING UP HDF5--- + herr_t status; + H5setup h5set; + + // init with number of bytes to be read into the data chunk buffer + h5set = initHDF5(h5set, lmprop.fname, lmprop.bpe * (hsize_t)lmprop.ele4chnk[nchnkrd]); + if (h5set.status < 0) { + printf("e> Cannot initialise reading the HDF5 dataset!\n"); + return; + } + // temporarily close it + status = H5Sclose(h5set.memspace); + + for (int i = 0; i < min(NSTREAMS, lmprop.nchnk); i++) { + + // start address for reading into the host buffer + h5set.start[0] = lmprop.bpe * (hsize_t)lmprop.atag[nchnkrd]; + + // prepare chunk + h5set.count[0] = lmprop.bpe * (hsize_t)lmprop.ele4chnk[nchnkrd]; + h5set.memspace = H5Screate_simple(h5set.rank, &h5set.count[0], NULL); + + status = H5Sselect_hyperslab(h5set.dspace, H5S_SELECT_SET, &h5set.start[0], &h5set.stride[0], + &h5set.count[0], NULL); + if (status < 0) { + printf("e> error selecting the HDF5 slab!\n"); + return; + } + + status = H5Dread(h5set.dset, h5set.dtype, h5set.memspace, h5set.dspace, H5P_DEFAULT, + (void *)&lmbuff[i * ELECHNK_S * lmprop.bpe]); + if (status < 0) { + printf("e> error reading HDF5 slab!\n"); + return; + } + + if (lmprop.log <= LOGDEBUG) { + printf("\ni> %d-th LM data chunk (%lu B) has been read from address: %lu", i, + (H5Sget_select_npoints(h5set.dspace)), h5set.start[0]); + printf("\nele4chnk[%d]=%d", nchnkrd, lmprop.ele4chnk[nchnkrd]); + } + + // h5set.start[0] += (hsize_t) lmprop.bpe * lmprop.ele4chnk[nchnkrd]; + + // stream[i] can start processing the data + dataready[i] = 1; + nchnkrd += 1; + } + + status = H5Sclose(h5set.memspace); + status = H5Tclose(h5set.dtype); + status = H5Sclose(h5set.dspace); + status = H5Dclose(h5set.dset); + status = H5Fclose(h5set.file); + + if (lmprop.log <= LOGDEBUG) printf("\ni> done reading the data from HDF5 file.\n\n"); + + // change it to unsigned short + unsigned short *lm = (unsigned short *)lmbuff; + + // for(int i=0; i<20; i++){ + // printf("[%d]: %d, %d, %d, %d, %d, %d,\n", i, + // lm[i*3+0]&0xff, lm[i*3+0]>>8, + // lm[i*3+1]&0xff, lm[i*3+1]>>8, + // lm[i*3+2]&0xff, lm[i*3+2]>>8 ); + // } + // return; + + if (lmprop.log <= LOGINFO) printf("+> histogramming the LM data:\n"); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + //============================================================================ + for (int n = 0; n < lmprop.nchnk; n++) { // + + //***** launch the next free stream ****** + int si, busy = 1; + while (busy == 1) { + for (int i = 0; i < min(NSTREAMS, lmprop.nchnk); i++) { + if ((cudaStreamQuery(stream[i]) == cudaSuccess) && (dataready[i] == 1)) { + busy = 0; + si = i; + + if (lmprop.log <= LOGDEBUG) + printf(" i> stream[%d] was free for %d-th chunk.\n", si, n + 1); + + break; + } + // else{printf("\n >> stream %d was busy at %d-th chunk. \n", i, n);} + } + } + //****** + + // set a flag: stream[i] is busy now with processing the data. + dataready[si] = 0; + // // reinterpret the LM buffer into short + // short *lmbuff_s = (short*) lmbuff; + HANDLE_ERROR(cudaMemcpyAsync(&d_lmbuff[si * ELECHNK_S], &lm[si * ELECHNK_S * 3], + lmprop.ele4chnk[n] * sizeof(ushort3), cudaMemcpyHostToDevice, + stream[si])); + + hst<<>>(d_lmbuff, d_rprmt, d_mass, d_pview, d_sino, d_c2s, + lmprop.ele4thrd[n], lmprop.ele4chnk[n], + si * ELECHNK_S, lmprop.tstart, lmprop.tstop); + + if (lmprop.log <= LOGDEBUG) + printf("chunk[%d], stream[%d], ele4thrd[%d], ele4chnk[%d]\n", n, si, lmprop.ele4thrd[n], + lmprop.ele4chnk[n]); + + cudaStreamAddCallback(stream[si], MyCallback, (void *)(size_t)si, 0); + } + //============================================================================ + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + + if (lmprop.log <= LOGINFO) printf("+> histogramming DONE in %fs.\n", 0.001 * elapsedTime); + + cudaDeviceSynchronize(); + + //______________________________________________________________________________________________________ + + //***** close things down ***** + for (int i = 0; i < min(NSTREAMS, lmprop.nchnk); ++i) { + // printf("--> checking stream[%d], %s\n",i, cudaGetErrorName( cudaStreamQuery(stream[i]) )); + HANDLE_ERROR(cudaStreamDestroy(stream[i])); + } + + cudaFreeHost(lmbuff); + cudaFree(d_lmbuff); + + return; +} diff --git a/niftypet/nipet/lm_sig/src/hst_sig.h b/niftypet/nipet/lm_sig/src/hst_sig.h new file mode 100644 index 00000000..db776244 --- /dev/null +++ b/niftypet/nipet/lm_sig/src/hst_sig.h @@ -0,0 +1,25 @@ +#ifndef HST_H +#define HST_H + +#include +#include + +#include +#include +#include + +#include "def.h" +#include "lmproc_sig.h" +#include "scanner_0.h" + +void gpu_hst(LMprop _lmprop, unsigned int *d_rprmt, unsigned int *d_mass, unsigned int *d_pview, + unsigned int *d_sino, int *d_c2s, short *r2s); + +#define min(a, b) \ + ({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) + +#endif diff --git a/niftypet/nipet/lm_sig/src/lm_sig_module.cu b/niftypet/nipet/lm_sig/src/lm_sig_module.cu new file mode 100644 index 00000000..c4b4e67a --- /dev/null +++ b/niftypet/nipet/lm_sig/src/lm_sig_module.cu @@ -0,0 +1,547 @@ +/*------------------------------------------------------------------------ +CUDA C extention for Python +Provides functionality for list-mode data processing including histogramming +QC and random estimation. + +author: Pawel Markiewicz +Copyrights: 2019 +------------------------------------------------------------------------*/ +#define PY_SSIZE_T_CLEAN +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION // NPY_API_VERSION + +#include "def.h" +#include "hdf5.h" +#include "lmproc_sig.h" +#include "scanner_0.h" +#include +#include +#include + +//=== START PYTHON INIT === + +//--- Available functions +static PyObject *find_tmarker(PyObject *self, PyObject *args); +static PyObject *lminfo(PyObject *self, PyObject *args); +static PyObject *hist(PyObject *self, PyObject *args); +//--- + +//> Module Method Table +static PyMethodDef lmproc_sig_methods[] = { + {"nxtmrkr", find_tmarker, METH_VARARGS, "get the next time marker in LM data."}, + {"lminfo", lminfo, METH_VARARGS, "get the time info about the LM data."}, + {"hist", hist, METH_VARARGS, "process the LM data using CUDA streams."}, + {NULL, NULL, 0, NULL} // Sentinel +}; + +//> Module Definition Structure +static struct PyModuleDef lmproc_sig_module = { + PyModuleDef_HEAD_INIT, + "lmproc_sig", //> name of module + //> module documentation, may be NULL + "This module provides an interface for GE Signa list-mode processing using GPU routines.", + -1, //> the module keeps state in global variables. + lmproc_sig_methods}; + +//> Initialization function +PyMODINIT_FUNC PyInit_lmproc_sig(void) { + + Py_Initialize(); + + //> load NumPy functionality + import_array(); + + return PyModule_Create(&lmproc_sig_module); +} + +//====================================================================================== + +//====================================================================================== +// P R O C E S I N G L I S T M O D E D A T A +//-------------------------------------------------------------------------------------- +// GE Signa + +//- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +static PyObject *find_tmarker(PyObject *self, PyObject *args) { + // GE Signa function acting on list-mode data file (HDF5). Finds the next time marker. + + // path to LM file + char *flm; + + // bpe-byte event offset (bpe: bytes per event) + unsigned long long evntOff; + + // finds k*markers forward or backward. + unsigned long long k_markers; + + // direction of search (forward or backward) + int dsearch; + + // number of bytes per event + int bpe; + + herr_t status; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "siKKi", &flm, &bpe, &evntOff, &k_markers, &dsearch)) return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + // byte values for the whole event + uint8_t *bval = (uint8_t *)malloc(bpe * sizeof(uint8_t)); + ; + + hsize_t start[1]; + hsize_t count[1]; + hsize_t stride[1] = {1}; + count[0] = (hsize_t)bpe; + + hid_t H5file = H5Fopen(flm, H5F_ACC_RDONLY, H5P_DEFAULT); + if (H5file < 0) { + printf("ce> could not open the HDF5 file!\n"); + return NULL; + } + hid_t dset = H5Dopen(H5file, LMDATASET_S, H5P_DEFAULT); + if (dset < 0) { + printf("ce> could not open the list-mode dataset!\n"); + return NULL; + } + + hid_t dtype = H5Dget_type(dset); + hid_t dspace = H5Dget_space(dset); + int rank = H5Sget_simple_extent_ndims(dspace); + hid_t memspace = H5Screate_simple(rank, &count[0], NULL); + + // int rank = H5Sget_simple_extent_ndims (dspace); + // hsize_t dims[rank]; + // hsize_t maxDims[rank]; + // rank = H5Sget_simple_extent_dims (dspace, &dims[0], &maxDims[0]); + // printf("ci> rank = %d; length = %lu\n", rank, dims[0]); + + int tmarker; + // prompt counts + int pcounts = 0; + // visited time markers + int visit_markers = 0; + + while (visit_markers < k_markers) { + start[0] = (hsize_t)(evntOff * bpe); + status = H5Sselect_hyperslab(dspace, H5S_SELECT_SET, &start[0], &stride[0], &count[0], NULL); + status = H5Dread(dset, dtype, memspace, dspace, H5P_DEFAULT, (void *)bval); + // for (int i=0; i> 3)] << i; } + } + } + // update the event offset in the direction + evntOff += dsearch; + } + + status = H5Sclose(memspace); + status = H5Tclose(dtype); + status = H5Sclose(dspace); + // printf ("H5Sclose: %i\n", status); + status = H5Dclose(dset); + // printf ("H5Dclose: %i\n", status); + status = H5Fclose(H5file); + // printf ("H5Fclose: %i\n", status); + + PyObject *tuple_out = PyTuple_New(3); + PyTuple_SetItem(tuple_out, 0, Py_BuildValue("L", evntOff - 1)); + PyTuple_SetItem(tuple_out, 1, Py_BuildValue("i", tmarker)); + PyTuple_SetItem(tuple_out, 2, Py_BuildValue("i", pcounts)); + return tuple_out; +} + +// ================================================================================ + +static PyObject *lminfo(PyObject *self, PyObject *args) { + + // preallocated dictionary of output arrays + PyObject *o_lmprop; + + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "O", &o_lmprop)) return NULL; + + PyObject *pd_flm = PyDict_GetItemString(o_lmprop, "lmfn"); + // char *flm = (char*) PyUnicode_AS_UNICODE(pd_flm); + char *flm = (char *)PyUnicode_DATA(pd_flm); + + // bytes per event + PyObject *pd_bpe = PyDict_GetItemString(o_lmprop, "bpe"); + int bpe = (int)PyLong_AsLong(pd_bpe); + + PyObject *pd_log = PyDict_GetItemString(o_lmprop, "LOG"); + int log = (int)PyLong_AsLong(pd_log); + + // number of elements (all kinds of events recorded in the LM dataset) + PyObject *pd_ele = PyDict_GetItemString(o_lmprop, "nele"); + uint64_t ele = (uint64_t)PyLong_AsUnsignedLongLongMask(pd_ele); + // number of data chunk to be independently processed by CUDA streams + PyObject *pd_nchk = PyDict_GetItemString(o_lmprop, "nchk"); + uint64_t nchnk = (uint64_t)PyLong_AsUnsignedLongLongMask(pd_nchk); + // number of time tags + PyObject *pd_nitg = PyDict_GetItemString(o_lmprop, "nitg"); + int nitag = (int)PyLong_AsLong(pd_nitg); + + // start time marker + PyObject *pd_tm0 = PyDict_GetItemString(o_lmprop, "tm0"); + int tm0 = (int)PyLong_AsLong(pd_tm0); + // stop time marker + PyObject *pd_tm1 = PyDict_GetItemString(o_lmprop, "tm1"); + int tm1 = (int)PyLong_AsLong(pd_tm1); + + // time offset (the first time marker) + PyObject *pd_toff = PyDict_GetItemString(o_lmprop, "toff"); + int toff = (int)PyLong_AsLong(pd_toff); + // last time marker + PyObject *pd_tend = PyDict_GetItemString(o_lmprop, "tend"); + int last_ttag = (int)PyLong_AsLong(pd_tend); + + if (log <= LOGINFO) { + printf("i> LM file = %s\n", flm); + printf(" # bpe = %d\n", bpe); + printf(" # elements = %lu\n", ele); + printf(" # chunks = %lu\n", nchnk); + printf(" # time tags = %d\n", nitag); + printf(" time start = %d\n", tm0); + printf(" time end = %d\n", tm1); + printf("x time offset = %d\n", toff); + printf("x time end = %d\n", last_ttag); + } + + // address of the event tags (events are 6-bytes minimum) + PyObject *pd_atag = PyDict_GetItemString(o_lmprop, "atag"); + // time tags + PyObject *pd_btag = PyDict_GetItemString(o_lmprop, "btag"); + // elements (all kinds of events) per CUDA thread + PyObject *pd_ethr = PyDict_GetItemString(o_lmprop, "ethr"); + // element per data chunk + PyObject *pd_echk = PyDict_GetItemString(o_lmprop, "echk"); + + PyArrayObject *p_atag = NULL, *p_btag = NULL, *p_ethr = NULL, *p_echk = NULL; + p_atag = (PyArrayObject *)PyArray_FROM_OTF(pd_atag, NPY_UINT64, NPY_ARRAY_INOUT_ARRAY2); + p_btag = (PyArrayObject *)PyArray_FROM_OTF(pd_btag, NPY_INT32, NPY_ARRAY_INOUT_ARRAY2); + p_ethr = (PyArrayObject *)PyArray_FROM_OTF(pd_ethr, NPY_INT32, NPY_ARRAY_INOUT_ARRAY2); + p_echk = (PyArrayObject *)PyArray_FROM_OTF(pd_echk, NPY_INT32, NPY_ARRAY_INOUT_ARRAY2); + + if (p_atag == NULL || p_btag == NULL || p_ethr == NULL || p_echk == NULL) { + PyArray_DiscardWritebackIfCopy(p_atag); + Py_XDECREF(p_atag); + PyArray_DiscardWritebackIfCopy(p_btag); + Py_XDECREF(p_btag); + PyArray_DiscardWritebackIfCopy(p_ethr); + Py_XDECREF(p_ethr); + PyArray_DiscardWritebackIfCopy(p_echk); + Py_XDECREF(p_echk); + return NULL; + } + + off_t *atag = (off_t *)PyArray_DATA(p_atag); + int *btag = (int *)PyArray_DATA(p_btag); + int *ele4thrd = (int *)PyArray_DATA(p_ethr); + int *ele4chnk = (int *)PyArray_DATA(p_echk); + + //* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + // HDF5 stuff + herr_t status; + H5setup h5set; + h5set = initHDF5(h5set, flm, bpe); + if (h5set.status < 0) { + printf("e> HDF5 not set up correctly for read!\n"); + Py_DECREF(p_atag); + Py_DECREF(p_btag); + Py_DECREF(p_ethr); + Py_DECREF(p_echk); + Py_INCREF(Py_None); + return Py_None; + } + //* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + + if (log <= LOGDEBUG) printf("i> setting up data chunks:\n"); + + int i = 0; + char tag = 0; + while ((ele + atag[0] - atag[i]) > ELECHNK_S) { + i += 1; + int c = 0; + tag = 0; + while (tag == 0) { + h5set.start[0] = (hsize_t)((atag[i - 1] + ELECHNK_S - c - 1) * bpe); + status = H5Sselect_hyperslab(h5set.dspace, H5S_SELECT_SET, &h5set.start[0], &h5set.stride[0], + &h5set.count[0], NULL); + status = H5Dread(h5set.dset, h5set.dtype, h5set.memspace, h5set.dspace, H5P_DEFAULT, + (void *)h5set.bval); + // check if time marker + if ((h5set.bval[0]) == 1) { + // set the flag that time tag was found + tag = 1; + // get the time in msec + int itime = 0; + for (int m = 0; m <= 24; m += 8) itime += h5set.bval[2 + (m >> 3)] << m; + btag[i] = itime - tm0; + atag[i] = (atag[i - 1] + ELECHNK_S - c - 1); + ele4chnk[i - 1] = atag[i] - atag[i - 1]; + ele4thrd[i - 1] = (atag[i] - atag[i - 1] + (TOTHRDS - 1)) / TOTHRDS; + } + c += 1; + } + + if (log <= LOGDEBUG) { + printf("i> break time tag [%d] is: %dms at position %lu. \n", i, btag[i], atag[i]); + printf(" # elements: %d/per chunk, %d/per thread. c = %d.\n", ele4chnk[i - 1], + ele4thrd[i - 1], c); + // printf(" > ele-atag: %d, size: %d\n", ele-atag[i], ELECHNK_S); + } + } + + i += 1; + + // add 1ms for any remaining events + btag[i] = tm1 - tm0 + 1; + atag[i] = atag[0] + ele; + ele4thrd[i - 1] = (atag[0] + ele - atag[i - 1] + (TOTHRDS - 1)) / TOTHRDS; + ele4chnk[i - 1] = atag[0] + ele - atag[i - 1]; + + if (log <= LOGDEBUG) { + printf("i> break time tag [%d] is: %dms at position %lu.\n", i, btag[i], atag[i]); + printf(" # elements: %d/per chunk, %d/per thread.\n", ele4chnk[i - 1], ele4thrd[i - 1]); + } + + // Clean up + status = H5Sclose(h5set.memspace); + status = H5Tclose(h5set.dtype); + status = H5Sclose(h5set.dspace); + status = H5Dclose(h5set.dset); + status = H5Fclose(h5set.file); + + PyArray_ResolveWritebackIfCopy(p_atag); + Py_DECREF(p_atag); + PyArray_ResolveWritebackIfCopy(p_btag); + Py_DECREF(p_btag); + PyArray_ResolveWritebackIfCopy(p_ethr); + Py_DECREF(p_ethr); + PyArray_ResolveWritebackIfCopy(p_echk); + Py_DECREF(p_echk); + + Py_INCREF(Py_None); + return Py_None; +} + +// ================================================================================ + +static PyObject *hist(PyObject *self, PyObject *args) { + + // preallocated output dictionary of numpy arrays + PyObject *o_hout; + + // dictionary of input arrays + PyObject *o_lmprop; + + // int tstart, tstop; + + PyObject *o_frames; + + // properties of the LM data + LMprop lmprop; + + // Dictionary of scanner constants + PyObject *o_cnst; + // axial LUTs + PyObject *o_axLUT; + PyObject *o_txLUT; + + // structure of constants + Cnst Cnt; + + // rings to sino index: axial LUT + short *r2s; + // crystals to sino index: transaxial LUT + int *c2s; + + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOO", &o_hout, &o_lmprop, &o_frames, &o_txLUT, &o_axLUT, + &o_cnst)) + return NULL; + + PyObject *pd_flm = PyDict_GetItemString(o_lmprop, "lmfn"); + lmprop.fname = (char *)PyUnicode_DATA(pd_flm); + // number of elements (all kinds of events recorded in the LM dataset) + PyObject *pd_ele = PyDict_GetItemString(o_lmprop, "nele"); + lmprop.ele = (size_t)PyLong_AsUnsignedLongLongMask(pd_ele); + // number of data chunk to be independently processed by CUDA streams + PyObject *pd_nchk = PyDict_GetItemString(o_lmprop, "nchk"); + lmprop.nchnk = (int)PyLong_AsUnsignedLongLongMask(pd_nchk); + // number of time tags + PyObject *pd_nitg = PyDict_GetItemString(o_lmprop, "nitg"); + lmprop.nitag = (int)PyLong_AsLong(pd_nitg); + + // start time marker + PyObject *pd_tm0 = PyDict_GetItemString(o_lmprop, "tm0"); + lmprop.tstart = (int)PyLong_AsLong(pd_tm0); + // stop time marker + PyObject *pd_tm1 = PyDict_GetItemString(o_lmprop, "tm1"); + lmprop.tstop = (int)PyLong_AsLong(pd_tm1); + + // time offset (the first time marker) + PyObject *pd_toff = PyDict_GetItemString(o_lmprop, "toff"); + lmprop.toff = (int)PyLong_AsLong(pd_toff); + // last time marker + PyObject *pd_tend = PyDict_GetItemString(o_lmprop, "tend"); + lmprop.last_ttag = (int)PyLong_AsLong(pd_tend); + + // bootstrap mode + PyObject *pd_btp = PyDict_GetItemString(o_cnst, "BTP"); + Cnt.BTP = (char)PyLong_AsLong(pd_btp); + + // bytes per event + PyObject *pd_bpe = PyDict_GetItemString(o_lmprop, "bpe"); + lmprop.bpe = (int)PyLong_AsLong(pd_bpe); + lmprop.btp = Cnt.BTP; + + PyObject *pd_log = PyDict_GetItemString(o_lmprop, "LOG"); + lmprop.log = (int)PyLong_AsLong(pd_log); + + // //--- start and stop time (IT IS TIME RELATIVE TO THE OFFSET) + // if (lmprop.tstart==lmprop.tstop){ + // lmprop.tstart = 0; + // lmprop.tstop = lmprop.nitag; + // } + + // printf("i> LM file = %s\n", lmprop.fname); + // printf(" # bpe = %d\n", lmprop.bpe); + // printf(" # elements = %lu\n", lmprop.ele); + // printf(" # chkunks = %lu\n", lmprop.nchnk); + // printf(" # time tags = %d\n", lmprop.nitag); + // printf(" time offset = %d\n", lmprop.toff); + // printf(" time end = %d\n", lmprop.last_ttag); + // printf(" tstart = %d\n", lmprop.tstart); + // printf(" tstop = %d\n", lmprop.tstop); + + PyArrayObject *p_atag = NULL, *p_btag = NULL, *p_ethr = NULL, *p_echk = NULL; + + // address of the event tags (events are 6-bytes minimum) + PyObject *pd_atag = PyDict_GetItemString(o_lmprop, "atag"); + p_atag = (PyArrayObject *)PyArray_FROM_OTF(pd_atag, NPY_UINT64, NPY_ARRAY_IN_ARRAY); + // time tags + PyObject *pd_btag = PyDict_GetItemString(o_lmprop, "btag"); + p_btag = (PyArrayObject *)PyArray_FROM_OTF(pd_btag, NPY_INT32, NPY_ARRAY_IN_ARRAY); + // elements (all kinds of events) per CUDA thread + PyObject *pd_ethr = PyDict_GetItemString(o_lmprop, "ethr"); + p_ethr = (PyArrayObject *)PyArray_FROM_OTF(pd_ethr, NPY_INT32, NPY_ARRAY_IN_ARRAY); + // element per data chunk + PyObject *pd_echk = PyDict_GetItemString(o_lmprop, "echk"); + p_echk = (PyArrayObject *)PyArray_FROM_OTF(pd_echk, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_frames = NULL, *p_r2s = NULL, *p_c2s = NULL; + /* Dynamic frames, if one of 0 then static */ + p_frames = (PyArrayObject *)PyArray_FROM_OTF(o_frames, NPY_UINT16, NPY_ARRAY_IN_ARRAY); + + // axial LUTs (rings to sino index LUT): + PyObject *pd_r2s = PyDict_GetItemString(o_axLUT, "r2s"); + p_r2s = (PyArrayObject *)PyArray_FROM_OTF(pd_r2s, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + // transaxial LUTs (crystal to transaxial sino coordinates): + PyObject *pd_c2s = PyDict_GetItemString(o_txLUT, "c2s"); + p_c2s = (PyArrayObject *)PyArray_FROM_OTF(pd_c2s, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // output dictionary hstout + PyArrayObject *p_phc = NULL, *p_mss = NULL, *p_pvs = NULL, *p_psn = NULL; + + // prompts head curve + PyObject *pd_phc = PyDict_GetItemString(o_hout, "phc"); + p_phc = (PyArrayObject *)PyArray_FROM_OTF(pd_phc, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // centre of mass of axial radiodistribution + PyObject *pd_mss = PyDict_GetItemString(o_hout, "mss"); + p_mss = (PyArrayObject *)PyArray_FROM_OTF(pd_mss, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + // projection views (sagittal and coronal) for video + PyObject *pd_pvs = PyDict_GetItemString(o_hout, "pvs"); + p_pvs = (PyArrayObject *)PyArray_FROM_OTF(pd_pvs, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // prompt sino + PyObject *pd_psn = PyDict_GetItemString(o_hout, "psn"); + p_psn = (PyArrayObject *)PyArray_FROM_OTF(pd_psn, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + if (p_atag == NULL || p_btag == NULL || p_ethr == NULL || p_echk == NULL || p_phc == NULL || + p_psn == NULL || p_frames == NULL || p_mss == NULL || p_pvs == NULL || p_r2s == NULL || + p_c2s == NULL) { + Py_XDECREF(p_atag); + Py_XDECREF(p_btag); + Py_XDECREF(p_ethr); + Py_XDECREF(p_echk); + + Py_XDECREF(p_frames); + Py_XDECREF(p_r2s); + Py_XDECREF(p_c2s); + + PyArray_DiscardWritebackIfCopy(p_phc); + Py_XDECREF(p_phc); + PyArray_DiscardWritebackIfCopy(p_mss); + Py_XDECREF(p_mss); + PyArray_DiscardWritebackIfCopy(p_psn); + Py_XDECREF(p_psn); + PyArray_DiscardWritebackIfCopy(p_pvs); + Py_XDECREF(p_pvs); + + return NULL; + } + + lmprop.atag = (size_t *)PyArray_DATA(p_atag); + lmprop.btag = (size_t *)PyArray_DATA(p_btag); + lmprop.ele4thrd = (int *)PyArray_DATA(p_ethr); + lmprop.ele4chnk = (int *)PyArray_DATA(p_echk); + + r2s = (short *)PyArray_DATA(p_r2s); + c2s = (int *)PyArray_DATA(p_c2s); + + /* How many dynamic frames are there? */ + int nfrm = (int)PyArray_DIM(p_frames, 0); + unsigned short *frames = (unsigned short *)PyArray_DATA(p_frames); + + if (lmprop.log <= LOGINFO) printf("i> number of frames: %d\n", nfrm); + + hstout hout; + hout.phc = (unsigned int *)PyArray_DATA(p_phc); + hout.mss = (float *)PyArray_DATA(p_mss); + hout.pvs = (unsigned int *)PyArray_DATA(p_pvs); + + // sinograms + if (nfrm == 1) { + hout.psn = (unsigned int *)PyArray_DATA(p_psn); + } else if (nfrm > 1) { + hout.psn = (unsigned char *)PyArray_DATA(p_psn); + } + + //==================================================================== + lmproc(hout, lmprop, frames, nfrm, r2s, c2s, Cnt); + //==================================================================== + + // Clean up + Py_DECREF(p_atag); + Py_DECREF(p_btag); + Py_DECREF(p_ethr); + Py_DECREF(p_echk); + + Py_DECREF(p_frames); + Py_DECREF(p_r2s); + Py_DECREF(p_c2s); + + PyArray_ResolveWritebackIfCopy(p_psn); + Py_DECREF(p_psn); + PyArray_ResolveWritebackIfCopy(p_phc); + Py_DECREF(p_phc); + PyArray_ResolveWritebackIfCopy(p_pvs); + Py_DECREF(p_pvs); + PyArray_ResolveWritebackIfCopy(p_mss); + Py_DECREF(p_mss); + + Py_INCREF(Py_None); + return Py_None; +} diff --git a/niftypet/nipet/lm_sig/src/lmproc_sig.cu b/niftypet/nipet/lm_sig/src/lmproc_sig.cu new file mode 100644 index 00000000..0e162d99 --- /dev/null +++ b/niftypet/nipet/lm_sig/src/lmproc_sig.cu @@ -0,0 +1,143 @@ +/*------------------------------------------------------------------------ +CUDA C extention for Python +Provides functionality for list-mode data processing including histogramming. + +author: Pawel Markiewicz +Copyrights: 2020, University College London +------------------------------------------------------------------------*/ + +#include "lmproc_sig.h" + +//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* + +void HandleError(cudaError_t err, const char *file, int line) { + if (err != cudaSuccess) { + printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line); + exit(EXIT_FAILURE); + } +} + +//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* + +H5setup initHDF5(H5setup h5set, char *fname, hsize_t bytes) { + h5set.status = -1; // will be cleared to 0 if all is OK + // byte values for a single event + h5set.bval = (uint8_t *)malloc(bytes * sizeof(uint8_t)); + ; + h5set.stride[0] = 1; // always fixed + // count is the bytes + h5set.count[0] = bytes; + // open the HDF5 file for raw data acquisition + h5set.file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT); + if (h5set.file < 0) { + printf("e> could not open the HDF5 file!\n"); + return h5set; + } + // open the dataset of LM data + h5set.dset = H5Dopen(h5set.file, LMDATASET_S, H5P_DEFAULT); + if (h5set.dset < 0) { + printf("e> could not open the list-mode dataset!\n"); + return h5set; + } + // get the data type, data space, LM data rank and memory space. + h5set.dtype = H5Dget_type(h5set.dset); + h5set.dspace = H5Dget_space(h5set.dset); + h5set.rank = H5Sget_simple_extent_ndims(h5set.dspace); + h5set.memspace = H5Screate_simple(h5set.rank, &h5set.count[0], NULL); + h5set.status = 0; + return h5set; +} + +//-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* +void lmproc(hstout hout, LMprop lmprop, unsigned short *frames, int nfrm, short *r2s, int *c2s, + Cnst Cnt) { + + if (lmprop.log <= LOGDEBUG) { + printf("i> frame start time: %d\n", lmprop.tstart); + printf("i> frame stop time: %d\n", lmprop.tstop); + printf("i> # time tags: %d\n", lmprop.nitag); + } + + //--- prompt reports + unsigned int *d_rprmt; + HANDLE_ERROR(cudaMalloc(&d_rprmt, lmprop.nitag * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_rprmt, 0, lmprop.nitag * sizeof(unsigned int))); + //--- + + //--- for motion detection (centre of Mass) + unsigned int *d_mass; + cudaMalloc(&d_mass, lmprop.nitag * sizeof(unsigned int)); + cudaMemset(d_mass, 0, lmprop.nitag * sizeof(unsigned int)); + //--- + + // projection views + unsigned int *d_pview; + // projection views number of elements + int pve = -1; + if (lmprop.nitag > MXNITAG) { + pve = MXNITAG / (1 << VTIME) * SEG0_S * NSBINS_S; + // reduce the sino views to only the first 2 hours + HANDLE_ERROR(cudaMalloc(&d_pview, pve * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_pview, 0, pve * sizeof(unsigned int))); + } else { + pve = lmprop.nitag / (1 << VTIME) * SEG0_S * NSBINS_S; + HANDLE_ERROR(cudaMalloc(&d_pview, (lmprop.nitag / (1 << VTIME)) * SEG0_S * NSBINS_S * + sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset( + d_pview, 0, (lmprop.nitag / (1 << VTIME)) * SEG0_S * NSBINS_S * sizeof(unsigned int))); + } + + if (lmprop.log <= LOGDEBUG) + printf("i> number of projection views (%d seconds): %d\n", (1 << VTIME), pve); + //--- + + //---sinogram + int tot_bins = NSANGLES_S * NSBINS_S * NRNG_S * NRNG_S; + unsigned int *d_sino; + if (nfrm == 1) { + HANDLE_ERROR(cudaMallocManaged(&d_sino, tot_bins * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_sino, 0, tot_bins * sizeof(unsigned int))); + } else if (nfrm > 1) { + // dynamic data consists of 8-bit integers compressed into unsigned 32-bit integer + HANDLE_ERROR(cudaMallocManaged(&d_sino, (nfrm + 1) / 2 * tot_bins / 2 * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_sino, 0, (nfrm + 1) / 2 * tot_bins / 2 * sizeof(unsigned int))); + } else { + printf("e> forget about zero frames histogramming!\n"); + return; + } + //--- + + // LUTs + int *d_c2s; + HANDLE_ERROR(cudaMallocManaged(&d_c2s, NCRS_S * NCRS_S * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_c2s, c2s, NCRS_S * NCRS_S * sizeof(int), cudaMemcpyHostToDevice)); + + //************************************************************************************** + gpu_hst(lmprop, d_rprmt, d_mass, d_pview, d_sino, d_c2s, r2s); + //************************************************************************************** + cudaDeviceSynchronize(); + + // head curve + HANDLE_ERROR( + cudaMemcpy(hout.phc, d_rprmt, lmprop.nitag * sizeof(unsigned int), cudaMemcpyDeviceToHost)); + + // mass centre + unsigned int *mass = (unsigned int *)malloc(lmprop.nitag * sizeof(unsigned int)); + cudaMemcpy(mass, d_mass, lmprop.nitag * sizeof(unsigned int), cudaMemcpyDeviceToHost); + for (int i = 0; i < lmprop.nitag; i++) { hout.mss[i] = mass[i] / float(hout.phc[i]); } + + // projection views + HANDLE_ERROR(cudaMemcpy(hout.pvs, d_pview, pve * sizeof(unsigned int), cudaMemcpyDeviceToHost)); + + // sino + HANDLE_ERROR( + cudaMemcpy(hout.psn, d_sino, tot_bins * sizeof(unsigned int), cudaMemcpyDeviceToHost)); + + cudaFree(d_rprmt); + cudaFree(d_sino); + cudaFree(d_mass); + cudaFree(d_pview); + cudaFree(d_c2s); + + return; +} diff --git a/niftypet/nipet/lm_sig/src/lmproc_sig.h b/niftypet/nipet/lm_sig/src/lmproc_sig.h new file mode 100644 index 00000000..daf0208c --- /dev/null +++ b/niftypet/nipet/lm_sig/src/lmproc_sig.h @@ -0,0 +1,38 @@ +#ifndef LMPROC_H +#define LMPROC_H + +#include + +#include "def.h" +#include "hdf5.h" +#include "hst_sig.h" +#include "scanner_0.h" + +typedef struct { + unsigned int *phc; // head curve prompts + float *mss; // centre of mass of radiodistribution + unsigned int *pvs; // projection views + void *psn; + unsigned long long psm; +} hstout; // output structure + +typedef struct { + int status; + uint8_t *bval; // byte values for a single event + hsize_t start[1]; // slab properties + hsize_t count[1]; + hsize_t stride[1]; + hid_t file; + hid_t dset; + hid_t dtype; + hid_t dspace; + int rank; + hid_t memspace; +} H5setup; // HDF5 setup structure + +H5setup initHDF5(H5setup h5set, char *fname, hsize_t bpe); + +void lmproc(hstout hout, LMprop lmprop, unsigned short *frames, int nfrm, short *r2s, int *c2s, + Cnst Cnt); + +#endif diff --git a/niftypet/nipet/src/scanner_1.h b/niftypet/nipet/src/scanner_1.h new file mode 100644 index 00000000..c07f9f94 --- /dev/null +++ b/niftypet/nipet/src/scanner_1.h @@ -0,0 +1,168 @@ +#include "def.h" +#include + +#ifndef AUX_H +#define AUX_H + +struct Cnst { + int A; // sino angles + int W; // sino bins for any angular index + int aw; // sino bins (active only) + + int NCRS; // number of crystals + int NCRSR; // reduced number of crystals by gaps + int NRNG; // number of axial rings + int D; // number of linear indexes along Michelogram diagonals + int Bt; // number of buckets transaxially + + int B; // number of buckets (total) + int Cbt; // number of crystals in bucket transaxially + int Cba; // number of crystals in bucket axially + + int NSN1; // number of sinos in span-1 + int NSN11; // in span-11 + int NSN64; // with no MRD limit + + char SPN; // span-1 (s=1) or span-11 (s=11, default) or SSRB (s=0) + int NSEG0; + + char RNG_STRT; // range of rings considered in the projector calculations (start and stop, + // default are 0-64) + char RNG_END; // it only works with span-1 + + int TGAP; // get the crystal gaps right in the sinogram, period and offset given + int OFFGAP; + + int NSCRS; // number of scatter crystals used in scatter estimation + int NSRNG; + int MRD; + + float ALPHA; // angle subtended by a crystal + float RE; // effective ring diameter + float AXR; // axial crystal dim + + float COSUPSMX; // cosine of max allowed scatter angle + float COSSTP; // cosine step + + int TOFBINN; + float TOFBINS; + float TOFBIND; + float ITOFBIND; + + char BTP; // 0: no bootstrapping, 1: no-parametric, 2: parametric (recommended) + float BTPRT; // ratio of bootstrapped/original events in the target sinogram (1.0 default) + + char DEVID; // device (GPU) ID. allows choosing the device on which to perform calculations + char VERBOSE; // different levels of verbose/logging like in Python's logging package + + // float ICOSSTP; + + // short SS_IMZ; + // short SS_IMY; + // short SS_IMX; + // short SS_VXZ; + // short SS_VXY; + + // short SSE_IMZ; + // short SSE_IMY; + // short SSE_IMX; + // short SSE_VXZ; + // short SSE_VXY; + + float ETHRLD; +}; + +#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) +void HandleError(cudaError_t err, const char *file, int line); + +extern LMprop lmprop; + +typedef struct { + short *li2s11; + char *NSinos; +} span11LUT; + +typedef struct { + int *zR; // sum of z indx + int *zM; // total mass for SEG0 +} mMass; // structure for motion centre of Mass + +struct LORcc { + short c0; + short c1; +}; + +struct LORaw { + short ai; + short wi; +}; + +// structure for 2D sino lookup tables (Siemens mMR) +struct txLUTs { + LORcc *s2cF; + int *c2sF; + int *cr2s; + LORcc *s2c; + LORcc *s2cr; + LORaw *aw2sn; + int *aw2ali; + short *crsr; + char *msino; + char *cij; + int naw; +}; + +// structure for 2D sino lookup tables (GE Signa) +struct txLUT_S { + int *c2s; +}; + +// structure for axial look up tables (Siemens mMR) +struct axialLUT { + int *li2rno; // linear indx to ring indx + int *li2sn; // linear michelogram index (along diagonals) to sino index + int *li2nos; // linear indx to no of sinos in span-11 + short *sn1_rno; + short *sn1_sn11; + short *sn1_ssrb; + char *sn1_sn11no; + int Nli2rno[2]; // array sizes + int Nli2sn[2]; + int Nli2nos; +}; + +// structure for axial look up tables (GE Signa) +struct axialLUT_S { + short *r2s; +}; + +void getMemUse(void); + +LORcc *get_sn2crs(void); + +txLUTs get_txlut(Cnst Cnt); + +// LORcc *get_sn2rng(void); + +// get the properties of LM and the chunks into which the LM is divided +void getLMinfo(char *flm); + +// modify the properties of LM in case of dynamic studies as the number of frames wont fit in the +// memory +void modifyLMinfo(int tstart, int tstop); + +// LUT for converstion from span-1 to span-11 +span11LUT span1_span11(const Cnst Cnt); + +//------------------------ +// mMR gaps +//------------------------ +void put_gaps(float *sino, float *sng, int *aw2ali, Cnst Cnt); + +void remove_gaps(float *sng, + float *sino, + int snno, + int * aw2ali, +//------------------------ + +#endif // AUX_H diff --git a/setup.cfg b/setup.cfg index c538bb46..fdf2215f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,6 +48,7 @@ setup_requires= ninja install_requires= cuvec>=2.8.0 + h5py miutil>=0.6.0 nibabel>=2.4.0 nimpa>=2.0.0