diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..b88fbb8 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include *.txt +recursive-include examples *.ipynb +recursive-include src *.py diff --git a/OceanLab/__init.py__ b/OceanLab/__init.py__ new file mode 100644 index 0000000..a1a2eae --- /dev/null +++ b/OceanLab/__init.py__ @@ -0,0 +1,6 @@ +from .dyn import zeta, vmode_amp, psi2uv, eqmodes, vmodes +from .eof import my_eof_interp, eoft, ceof, reconstruct_ceof +from .oa import scaloa, vectoa +from .utils import argdistnear, meaneddy + +_version_ = '0.1.0' diff --git a/src/OceanLab/DYN.py b/OceanLab/dyn.py similarity index 63% rename from src/OceanLab/DYN.py rename to OceanLab/dyn.py index 0b95795..761b994 100644 --- a/src/OceanLab/DYN.py +++ b/OceanLab/dyn.py @@ -2,21 +2,103 @@ import numpy as np import seawater as sw +# =========================================================== +# SURFACE AGEOSTROPHIC VELOCITY +# =========================================================== +def curvature(ssh_grad, dxt, dyt): + ''' Curvature of the sea surface height contours + ============================================================================== + INPUT: + SSH = sea surface height gradient [Y,X] + dxt = zonal distance between the grid cells in meter [Y,X] + dyt = meridional distance between the grid cells in meter [Y,X] + + OUTPUT: + k = curvature of the flow + ============================================================================== + ''' + + ssh_grad_y, ssh_grad_x = ssh_grad[0], ssh_grad[1] + + # Terms in the norminator + nominator_1st = (np.gradient(ssh_grad_y/dyt)[0]/dyt)*(ssh_grad_x/dxt)**2 + nominator_2nd = (np.gradient(ssh_grad_x/dxt)[1]/dxt)*(ssh_grad_y/dyt)**2 + nominator_3rd = 2*(np.gradient(ssh_grad_y/dyt)[1]/dxt)*(ssh_grad_x/dxt)*(ssh_grad_y/dyt) + + # Terms in the denominator + denominator_1st = (ssh_grad_x/dxt)**2 + denominator_2nd = (ssh_grad_y/dyt)**2 + denominator = pow(denominator_1st + denominator_2nd, 3/2) + + k = (-1*nominator_1st - 1*nominator_2nd + 2*nominator_3rd)/denominator + + return k + +def ageostrophic_vel(LON, LAT, SSH): + ''' Surface ageostrophic velocity from gradient wind balance through the + combination of the surface geostrophic velocity and curvature of the flow + ============================================================================== + INPUT: + LON = longitudes [Y,X] + LAT = latitude [Y,X] + SSH = sea surface height in meter [Y,X] + + OUTPUT: + u_ag_vel = zonal component of the surface ageostrophic velocity + v_ag_vel = meridional component of the surface ageostrophic velocity + ============================================================================== + ''' + g = 9.8 + f = sw.f(LAT) + [trash,dlonx] = np.gradient(LON) + dx = dlonx*np.cos(LAT*np.pi/180)*111195. + [dlaty,trash] = np.gradient(LAT) + dy = dlaty*111195. + SSH_gradient = np.gradient(SSH) + k = curvature(SSH_gradient, dx, dy) + + ## Estimating the surface geostrophic velocity from SSH + u_geo_vel = (-g/f)*(SSH_gradient[0]/dy) + v_geo_vel = (g/f)*(SSH_gradient[1]/dx) + ## Reconstructing the surface velocity from the combinantion of the geostrophic velocity and curvature of the flow + u_total = (2*u_geo_vel)/(1 + np.sqrt(1 + 4*k*np.sqrt(u_geo_vel**2 + v_geo_vel**2)/f)) + v_total = (2*v_geo_vel)/(1 + np.sqrt(1 + 4*k*np.sqrt(u_geo_vel**2 + v_geo_vel**2)/f)) + ## Computing the surface ageostrophic velocity as the difference between the total and geostrophic fields + u_ag_vel = u_total - u_geo_vel + v_ag_vel = v_total - v_geo_vel + + return u_ag_vel, v_ag_vel + +# =========================================================== +# DYNAMICAL MODES AMPLITUDE +# =========================================================== def vmode_amp(A,vi): ''' This function makes the projection of every vertical mode to - timeseries or section matrix to obtain its amplitude. It will be used to - data reconstruction. + timeseries or section matrix to obtain its amplitude. + It will be used to data reconstruction. Operation: ((A'*A)^-1)*(A'*vi) + ============================================================= + + INPUT: + A = + vi = + OUTPUT: + AMP = + + ============================================================== ''' - return np.dot(np.linalg.inv(np.dot(A.T,A)),np.dot(A.T,vi)) + return np.dot(np.linalg.inv(np.dot(A.T,A)),np.dot(A.T,vi)) +# =========================================================== +# RELATIVE VORTICITY +# =========================================================== def zeta(x,y,U,V): ''' - ZETA k-component of rotational by velocity field + ZETA k-component of rotational by velocity field Returns the scalar field of vorticity from U and V velocity components. X and Y are longitude and latitude matrices, respectively, as U and V. All matrices have @@ -39,25 +121,21 @@ def zeta(x,y,U,V): (JM = 0) o ---------- o ---------- o ---------- o --- ... (IM = 0) (IM = 1) (IM = 2) (IM = 3) - ----------------------------------------------------------------- - - USAGE: u,v = psi2uv(x,y,psi) - - INPUT: - x = decimal degrees (+ve E, -ve W) [-180..+180] - y = decimal degrees (+ve N, -ve S) [- 90.. +90] - u = velocity zonal component [m s^-1] - v = velocity meridional component [m s^-1] + ================================================================ + USAGE: ZETA = zeta(x,y,u,v) - OUTPUT: - ZETA = Relative vorticity field [s^-1] - - AUTHOR: - Iury T. Simoes-Sousa and Wandrey Watanabe - 29 Jun 2016 - Laboratório de Dinâmica Oceânica - IOUSP - ====================================================================== + INPUT: + x = decimal degrees (+ve E, -ve W) [-180..+180] + y = decimal degrees (+ve N, -ve S) [- 90.. +90] + u = velocity zonal component [m s^-1] + v = velocity meridional component [m s^-1] + OUTPUT: + ZETA = Relative vorticity field [s^-1] + + ================================================================ ''' + #função para cálculo de ângulos angcalc = lambda dy,dx: np.math.atan2(dy,dx) @@ -116,16 +194,21 @@ def zeta(x,y,U,V): return ZETA +# =========================================================== +# U AND V FROM STREAMFUNCTION +# =========================================================== def psi2uv(x,y,psi): ''' - PSI2UV Velocity components from streamfunction - Returns the velocity components U and V - from streamfunction PSI. X and Y are longitude and latitude matrices, - respectively, as PSI. All matrices have same dimension. + PSI2UV Velocity components from streamfunction - --> !!! IMPORTANT !!! <----------------------------------------- - The grid indexes IM and JM must have origin on the left-inferior - corner, increasing to right and to up, respectively. + Returns the velocity components U and V + from streamfunction PSI. X and Y are longitude and latitude + matrices, respectively, as PSI. + All matrices have same dimension. + + --> !!! IMPORTANT !!! <----------------------------------- + The grid indexes IM and JM must have origin on the + lower-left corner, increasing to right and to up. GRID : : : : @@ -139,23 +222,20 @@ def psi2uv(x,y,psi): (JM = 0) o ---------- o ---------- o ---------- o --- ... (IM = 0) (IM = 1) (IM = 2) (IM = 3) - ----------------------------------------------------------------- + ============================================================== - USAGE: u,v = psi2uv(x,y,psi) + USAGE: u,v = psi2uv(x,y,psi) - INPUT: - x = decimal degrees (+ve E, -ve W) [-180..+180] - y = decimal degrees (+ve N, -ve S) [- 90.. +90] - psi = streamfunction [m^2 s^-1] + INPUT: + x = decimal degrees (+ve E, -ve W) [-180..+180] + y = decimal degrees (+ve N, -ve S) [- 90.. +90] + psi = streamfunction [m^2 s^-1] - OUTPUT: - u = velocity zonal component [m s^-1] - v = velocity meridional component [m s^-1] + OUTPUT: + u = velocity zonal component [m s^-1] + v = velocity meridional component [m s^-1] - AUTHOR: - Iury T. Simoes-Sousa and Wandrey Watanabe - 12 May 2016 - Laboratório de Dinâmica Oceânica - IOUSP - ====================================================================== + ========================================================== ''' #função para cálculo de ângulos @@ -210,45 +290,34 @@ def psi2uv(x,y,psi): return U,V - +# =========================================================== +# EQUATORIAL MODES +# =========================================================== def eqmodes(N2,z,nm,lat,pmodes=False): ''' This function computes the equatorial velocity modes - ======================================================== - - Input: - - N2 - Brunt-Vaisala frequency data array - - z - Depth data array (equaly spaced) - - lat - Latitude scalar - - nm - Number of modes to be computed - - pmodes - If the return of pressure modes is required. - Default is False - - ======================================================== - - Output: - - Si - Equatorial modes matrix with MxN dimension being: - M = z array size - N = nm - - Rdi - Deformation Radii array - - Fi - Pressure modes matrix with MxN dimension being: - M = z array size - N = nm - - Returned only if input pmodes=True - - made by Iury T. Simoes-Sousa, Hélio Almeida and Wandrey Watanabe - Laboratório de Dinâmica Oceânica - Universidade de São Paulo - 2016 + ============================================================ + + INPUT: + N2 = Brunt-Vaisala frequency data array + z = Depth data array (equaly spaced) + lat = Latitude scalar + nm = Number of modes to be computed + pmodes = If the return of pressure modes is required. + (Default is False) + + OUTPUT: + Si = Equatorial modes matrix with MxN dimension being: + M = z array size + N = nm + Rdi = Deformation Radii array + Fi = Pressure modes matrix with MxN dimension being: + M = z array size + N = nm + (Returned only if input pmodes=True) + + ============================================================ ''' #nm will be the number of baroclinic modes @@ -327,7 +396,7 @@ def eqmodes(N2,z,nm,lat,pmodes=False): # F_j(z)=-g.he_j d/dzS_j # #### - if pmodes==True: + if pmodes: Fi=np.zeros(Si.shape) for i in np.arange(nm): Fi[1:-1,i] =\ @@ -352,8 +421,22 @@ def eqmodes(N2,z,nm,lat,pmodes=False): return Si,radii - +# =========================================================== +# DYNAMICAL MODES +# =========================================================== def vmodes(N2,z,nm,lat,ubdy='N',lbdy='N'): + + """ + + ======================================================== + INPUT: + + OUTPUT: + + AUTHOR: + ======================================================== + """ + f0 = sw.f(lat) dz = np.abs(z[1] - z[0]) @@ -417,4 +500,4 @@ def vmodes(N2,z,nm,lat,ubdy='N',lbdy='N'): radii[0] = np.sqrt(9.81*H)/np.abs(f0)/1000 radii[1:] = 1/np.sqrt(ei[1:])/1000 - return fi, radii \ No newline at end of file + return fi, radii diff --git a/OceanLab/eof.py b/OceanLab/eof.py new file mode 100644 index 0000000..2f1e922 --- /dev/null +++ b/OceanLab/eof.py @@ -0,0 +1,281 @@ +import numpy as np +import scipy.linalg as la +from dask import delayed +from scipy.signal import hilbert +import xarray as xr +from dask.distributed import Client, LocalCluster + +# functions +#========================================= +# COMPUTE EOFS +#========================================= +def eoft(trmat,nm=None): + ''' + evals_perc,evecs_norm,amp = eoft(trmat) + + Computes eof in time (or depth) for general cases + normally trmat is a matrix containing each time series + (or vertical profiles) as a row. + That is, for N stations having m data points, + trmat is a N by m matrix. + ''' + + #if is masked the data + try: + Trmat = trmat.data + Trmat[trmat.mask] = np.nan + trmat = Trmat.copy() + except: + None + + #% demeans trmat prior to doing eof + trmat = (trmat.T-np.nanmean(trmat,axis=1)).T + #% computes zero-lag cross-covariance matrix + mcov = np.dot(trmat,trmat.T)/(trmat.shape[1]-1) + #% computes eigenvalues and eigenvectors + evals,evecs = np.linalg.eig(mcov) + #find the descending order of eigenvalues + ind = np.argsort(evals)[::-1] + #% normalize eigenvectors + evecs_norm = evecs[:,ind]/np.linalg.norm(evecs[:,ind],axis=0) + #% sort eigenvalues and computes percent variance explained by each mode + evals_perc = evals[ind]/np.sum(evals) + #% computes amplitude functions + amp = np.dot(evecs_norm.T,trmat) + + #if was chosen a number of eigenvectors + if nm is not None: + evecs_norm = evecs_norm[:,:nm] + evals_perc = evals_perc[:nm] + amp = amp[:nm,:] + + return evals_perc,evecs_norm,amp + +#========================================= +# FILL GAPS WITH EOFS +#========================================= +def my_eof_interp(M,nmodes,errmin=1e-15,repmax=None): + + """ + vi = my_eof_interp(M,nmodes) + + This function try to fill gappy data using its EOFs + + INPUT: + -> M: the data Matrix (generaly the diferent stations are on the columns); + -> nmodes: the number of statiscally significant EOF modes (to be used for the series reconstruction) + + The method -- based on Beckers & Rixen (2003, JAOTech) + + The algorithm proposed by the authors is as follows. (see Eqs. (13) and (14) from Beckers & Rixen) + (I) Put zero in the GAPPY positions ("unbiased INITIAL GUESS, by assuming that the removed mean was unbiased). + (II) Then you can have a first estimative of the EOFs. + (III) You can reconstruct the series using the statiscally significant modes (e.g.: determined using Monte Carlo simulations as in Preisendorfer [1988] -- OBS: the authors propose another method using cross validation during the iterative process) + (IV) Substitute the reconstructed series ONLY in the GAPPY positions in your original matrix. + (V) Run steps (II), (III) and (IV) iteractively till its convergence (i.e. compare the reconstructed series at time t+1 with that at time t). + + %%%%% OBS: my_eof_interp doesn't work for filling gaps simultaneously in all depths. + """ + + Mmean = np.nanmean(M,axis=1) + + M = M.T - np.nanmean(M,axis=1) + M = M.T + + f = np.isnan(M) + M[f] = 0 #%% initial guess in gappy positions + + rep = 0 + while True: + print(rep+1) + + + Ud,Dd,Vd = np.linalg.svd(M,'econ') + Dd = np.diagflat(Dd) + + #% compute the explained variance (total variance is the sum of the Dd elements squared -total energy-) + evd = np.diag(Dd)**2 #%% remember that the singular values (Dd) are the sqrt of + evd = Dd/np.sum(Dd) #%% the eigenvalue of the cov. matrix. + + + #%% truncated EOF reconstruction + Ud = Ud[:,0:nmodes] + Vd = Vd[:,0:nmodes] + Dd = Dd[0:nmodes,0:nmodes] + #Dd = Dd[0:nmodes] + + #% reconstructing the series -- truncated reconstruction (SVD: Mrec = Ud * Dd *Vd') + Xa = np.dot(np.dot(Ud,Dd),Vd.T) + + Mold = M.copy() #% saving for compute the error + + M[f] = Xa[f] + + #% testing the error + if rep>=1: + err = np.nanmean(np.abs(M - Mold))/(np.nanmax(M)-np.nanmin(M)) + print(err) + if err < errmin: + break + if repmax is not None: + if rep>=repmax: + break + rep += 1 + + #%% Saving the last the series with the EOF reconstructed series in the gappy positions; + vi = (M.T+Mmean).T + + return vi + +#========================================= +# PERFORM COMPLEX EOF +#========================================= +def ceof(lon, lat, data, nkp = 10, parallel = True): + ''' Complex (Hilbert) EOF to detect propagating features: waves, meanders, etc. + Note: the mean field in each coordinate is subtracted within the function. + Do not subtract the time-mean field before inputing. + NaN values are removed in the algorithm. + The user can input the data as it is. + + First written in MATLAB and found in Prof. Daniel J. Vimont webpage + (https://www.aos.wisc.edu/~dvimont/matlab/Stat_Tools/complex_eof.html) + ============================================================================== + INPUT: + lon = longitudes (array) + lat = latitude (array) + data = original data set [time, lat, lon] + nkp = number of modes to return (default = 10) + parallel = create a standard client kernel for parallel computing + [switch parallel to False, in case you created your own client] + + OUTPUT: + The variables below return inside a DataArray. + per = percent variance explained (real eigenvalues) + modes = first nkp complex loadings or eigenvectors [lat, lon, nkp] + SpAmp = spatial amplitude [lat, lon, nkp] + SpPhase = spatial phase [lat, lon, nkp] + pcs = first nkp complex principal components or amplitudes [time, nkp] + TAmp = temporal amplitude [time, nkp] + TPhase = temporal phase [time, nkp] + ============================================================================== + ''' + # Configure client for parallel computing + if parallel: + cluster = LocalCluster() + client = Client(cluster) + + # Organizing the data as time vs space + data_ceof = _org_data_ceof(lon, lat, data) + # We need to remove the mean field (i.e., the trend) in each coordinate to + # evaluate the variability + data_ceof = data_ceof - data_ceof.mean('time') + + # The variables below are useful later + load_real = np.zeros([data_ceof.shape[1], nkp])*np.nan + load_imag = np.zeros([data_ceof.shape[1], nkp])*np.nan + # It is necessary to remove the nan values of the matrix to solve the eigenvalue problem + nan_values = np.isnan(data_ceof[0,:]) # We can just look at each coordinate along a single time + data_ceof = data_ceof[:,~nan_values] # Then, we remove all these coordinates in all of the occurences + + ntim, npt = data_ceof.shape + + # Hilbert transform: input sequence x and returns a complex result of the same length + print('1: Performing Hilbert transform') + data_hilbert = hilbert(data_ceof) + # Compute the covariance matrix in the Hilbert transform + print('2: Computing covariance matrix') + c = delayed(np.dot)(data_hilbert.conjugate().T, data_hilbert).compute()/ntim + print('3: Solving the eigenvalue problem') + lamda, loadings = delayed(la.eig)(c).compute() # lamda: eigenvalue, loadings: eigenvectors + + l = lamda.conjugate().T + k = np.argsort(l) + lamda, loadings = np.flip(l[k]), np.fliplr(loadings[:,k]) + loadings = loadings[:,:nkp] + # In case there were nan values in the orginal data, we need to perform the approach below: + load_real[~nan_values,:] = loadings.real.copy() + load_imag[~nan_values,:] = loadings.imag.copy() + load = load_real + 1j*load_imag + modes = load.reshape((len(lat),len(lon), nkp)) + + per = lamda.real*100/np.sum(lamda.real) + per = per[:nkp].copy() + pcs = np.dot(data_hilbert,loadings) + + sp_amp, sp_phase, t_amp, t_phase = _amplitude_phase(load, pcs) + sp_amp = sp_amp.reshape((len(lat),len(lon), nkp)) + sp_phase = sp_phase.reshape((len(lat),len(lon), nkp)) + + print('Done! \U0001F600') + + dims = ["lat", "lon", "nkp", "time"] + ds = xr.Dataset({"per":(dims[2], per),"modes":(dims[:-1], modes),"SpAmp":(dims[:-1], sp_amp), + "SpPhase":(dims[:-1], sp_phase),"pcs":(dims[-2:][::-1], pcs),"TAmp":(dims[-2:][::-1], t_amp), + "TPhase":(dims[-2:][::-1], t_phase)}, + coords={"lat":(dims[0], lat), "lon":(dims[1], lon), "nkp":(dims[2], np.arange(nkp)), + "time":(dims[3], np.arange(len(data_ceof)))}) + + return ds + +def _org_data_ceof(lon, lat, data): + dims = ["time", "lat", "lon"] + datxarray = xr.Dataset({"data_latlon": (dims, data)}, + coords={'lat':(dims[1], lat), 'lon':(dims[2], lon)}) + data_ceof = datxarray.stack(lat_lon=("lat", "lon")).data_latlon + return data_ceof + +def _amplitude_phase(evecs, amp): + ''' Complex (Hilbert) EOF + First written in MATLAB and found in the webpage below + (https://www.jsg.utexas.edu/fu/files/GEO391-W11-CEOF.pdf) + + =========================================================================== + INPUT: + evecs = first nkp complex loadings or eigenvectors [lat, lon, nkp] + amp = first nkp complex principal components or amplitudes [time, nkp] + + OUTPUT: + SpAmp = spatial amplitude [lat, lon, nkp] + SpPhase = spatial phase [lat, lon, nkp] + TAmp = temporal amplitude [time, nkp] + TPhase = temporal phase [time, nkp] + =========================================================================== + ''' + # Spatial amplitude + SpAmp = pow(np.multiply(evecs, np.conj(evecs)),0.5) + theta = np.arctan2(evecs.imag, evecs.real) + # Spatial phase + SpPhase = np.divide(np.multiply(theta, 180), np.pi) + + # Temporal amplitude + TAmp = pow(np.multiply(amp, np.conj(amp)), 0.5) + # Temporal phase + phit = np.arctan2(amp.imag, amp.real) + TPhase = np.divide(np.multiply(phit, 180), np.pi) + + return SpAmp, SpPhase, TAmp, TPhase + +def reconstruct_ceof(DataMean, amp, modes, n, day): + ''' Reconstrucion of daily CEOF modes individually similar to Majumder et al. (2019). + Here, the mean field in each coordinate is added within the function. + Besides, each mode is reconstructed individually, instead of computing the sum of the + reconstruction of different modes. + + ========================================================================================= + INPUT: + DataMean = time-mean of the original data [lat, lon] (e.g., np.nanmean(data,axis=0)) + amp = principal components or amplitudes [time, nkp] + mode = eigenvectors or loadings [lat, lon, nkp] + n = mode of variability to be reconstructed + day = day to be reconstructed. + + OUTPUT: + RecCEOF = reconstruction of a CEOF mode on a chosen day [lat, lon] + ========================================================================================= + ''' + + # Majumder et al (2019) compute the reconstructed CEOF field as the real part of the multiplication between + # the coefficient of expansion (i.e., amplitude) and the complex conjugate of the loading (i.e., mode) + Rec_ceof = amp[day,n]*np.conj(modes[:,:,n]) + RecCEOF = Rec_ceof.real + DataMean + return RecCEOF diff --git a/src/OceanLab/OA.py b/OceanLab/oa.py similarity index 93% rename from src/OceanLab/OA.py rename to OceanLab/oa.py index 3a47dc3..2360eb6 100644 --- a/src/OceanLab/OA.py +++ b/OceanLab/oa.py @@ -34,12 +34,7 @@ def vectoa(Xg,Yg,X,Y,U,V,corrlenx,corrleny,err,b=0): PSI - Streamfuction field matrix with MxN dimension. The dimension of the output is always the same of XC & YC - ====================================================================== - - PYTHON VERSION by: - Iury Sousa and Hélio Almeida - 30 May 2016 - Laboratório de Dinâmica Oceânica - IOUSP - ======================================================================''' + ''' # making sure that the input variables aren't changed xc,yc,x,y,u,v=Xg.copy(),Yg.copy(),X.copy(),Y.copy(),U.copy(),V.copy() @@ -99,12 +94,12 @@ def vectoa(Xg,Yg,X,Y,U,V,corrlenx,corrleny,err,b=0): tc = np.array(tc) tc.shape = ppc[0].shape d2=((np.tile(xc,(n,1)).T-np.tile(x,(nv,1)))**2+(np.tile(yc,(n,1)).T-np.tile(y,(nv,1)))**2) - R=np.exp(-lambd*d2)+bmo; + R=np.exp(-lambd*d2)+bmo P=np.zeros((nv,2*n)) # streamfunction-velocity covariance - P[:,0:n]=np.sin(tc)*np.sqrt(d2)*R; - P[:,n:2*n]=-np.cos(tc)*np.sqrt(d2)*R; + P[:,0:n]=np.sin(tc)*np.sqrt(d2)*R + P[:,n:2*n]=-np.cos(tc)*np.sqrt(d2)*R PSI=np.dot(P,np.linalg.solve(A,uv)) # solvi the linear system PSI=PSI.reshape(nv2,nv1).T @@ -168,7 +163,7 @@ def scaloa(xc, yc, x, y, t=[], corrlenx=None,corrleny=None, err=None, zc=None): # Gauss-Markov to get the weights that minimize the variance (OI). tp = None ep = 1 - np.sum(C.T * np.linalg.solve(A, C.T), axis=0) / (1 - err) - if any(t)==True: ##### was t!=None: + if any(t): ##### was t!=None: t = np.reshape(t, (n, 1)) tp = np.dot(C, np.linalg.solve(A, t)) #if 0: # NOTE: `scaloa2.m` @@ -179,7 +174,7 @@ def scaloa(xc, yc, x, y, t=[], corrlenx=None,corrleny=None, err=None, zc=None): # tp = tp + mD * np.ones(tp.shape) return tp, ep - if any(t)==False: ##### was t==None: + if not any(t): ##### was t==None: print("Computing just the interpolation errors.") #Normalized mean error. Taking the squared root you can get the #interpolation error in percentage. diff --git a/OceanLab/utils.py b/OceanLab/utils.py new file mode 100644 index 0000000..6b8c35b --- /dev/null +++ b/OceanLab/utils.py @@ -0,0 +1,161 @@ +import numpy as np +import scipy.signal as sg +import xarray as xr + +from dask.distributed import Client + +##### User functions +#============================================================================= +# NEAREST DISTANCE +#============================================================================= +def argdistnear(x,y,xi,yi): + ''' + This function finds the index to nearest points in (xi,yi) from (x,y) + ====================================================================== + + USAGE: + x,y = [5,1,10],[2,6,3] + xi,yi = np.linspace(0,19,20),np.linspace(-5,30,20) + ind = argdistnear(x,y,xi,yi) + + INPUT: + (x,y) = points [list] + (xi,yi) = series to search nearest point [list] + + OUTPUT: + ind = index of the nearest points + ====================================================================== + ''' + + idxs = [np.argmin(np.sqrt((xi-xx)**2 + (yi-yy)**2)) for xx,yy in zip(x,y)] + idxs = np.array(idxs) + return idxs +#============================================================================= + +#============================================================================= +# LOW PASS FILTER +#============================================================================= +def meaneddy(prop,days=60,ndim=1,DataArray=False,timedim=None): + + """ + Apply a low-pass filter (scipy.signal.butter) to 'prop' and obtain the + mean and eddy components. + ========================================================================== + + USAGE [1]: + # np.array() one year, 17 lat x 13 lon domain + Velocity = np.random.randn(365,17,13) + Filtered, Residual = meaneddy(Velocity, days=10, ndim=3, + DataArray=False,timedim=None) + + USAGE [2]: + # xr.DataArray() one year, 17 lat x 13 lon domain + Velocity = xr.DataArray(data=np.random.randn(365,17,13), + dims=["time","lat","lon"], + coords=dict(time=(["time"],range(0,365)), + lat=(["lat"],np.arange(-4,4.5,0.5)), + lon=(["lon"],np.arange(1,7.5,0.5)))) + Filtered, Residual = meaneddy(Velocity, days=10, + DataArray=True,timedim=["time"]) + + INPUT: + prop = 1, 2 or 3D array to be filtered + days = number of days to set up the filter + ndim = number of dimensions of the data + [only used for DataArray=False, max:3] + DataArray = True if prop is in xr.DataArray format + dim = name of time dimension to filter + [only used for DataArray=True] + + OUTPUT: + m_prop = mean (filtered) part of the property + p_prop = prime part of the property, essentially prop - m_prop + ========================================================================== + """ + + # creating filter + def timefilter(prop,filtdays=60): + filt_b,filt_a = sg.butter(4,1./filtdays) + return sg.filtfilt(filt_b,filt_a,prop) + + if DataArray: + m_prop = xr.apply_ufunc( + timefilter, # first the function + prop,# now arguments in the order expected by 'butter_filt' + input_core_dims=[timedim], # list with one entry per arg + output_core_dims=[timedim], # returned data + kwargs={'filtdays':days}, + vectorize=True, # loop over non-core dims + dask='vectorized') + + p_prop = prop - m_prop + + elif ndim ==3: + ti,lt,ln = prop.shape + prop = prop.reshape(ti,lt*ln) + + m_prop = [] + + for tot in prop.T: + # filtered series (mean) + m_prop.append(timefilter(tot,filtdays=days)) + + m_prop = np.array(m_prop) + m_prop = m_prop.T + + p_prop = prop - m_prop + + m_prop = m_prop.reshape(ti,lt,ln) + p_prop = p_prop.reshape(ti,lt,ln) + + elif ndim ==2: + + ti,lt = prop.shape + + m_prop = [] + + for tot in prop.T: + # filtered series (mean) + m_prop.append(timefilter(tot,filtdays=days)) + + m_prop = np.array(m_prop) + m_prop = m_prop.T + + p_prop = prop - m_prop + + m_prop = m_prop.reshape(ti,lt) + p_prop = p_prop.reshape(ti,lt) + + elif ndim ==1: + m_prop = timefilter(prop,filtdays=days) + p_prop = prop - m_prop + + return m_prop,p_prop +#============================================================================= + +##### Functions for relative imports +# ============================================================================= +# KERNEL FOR PARALLEL COMPUTING +# ============================================================================= +def _parallel_client(cpu_params=dict(tpw=2,nw=4,ml=7.5)): + """ + Create client kernel for parallel computing + ==================================================== + INPUT: + -> cpu_params: dict containing floats with keys + -> tpw: threads_per_worker + -> nw: n_workers + -> ml: memory_limit per worker [GB] + OUTPUT: + -> client: configuration of parallel computing + ==================================================== + """ + + client = Client(threads_per_worker=cpu_params['tpw'], + n_workers=cpu_params['nw'], + memory_limit=str(cpu_params['ml'])+'GB') + return client +#============================================================================= + + + diff --git a/README.md b/README.md index 0d68788..4d95112 100644 --- a/README.md +++ b/README.md @@ -2,25 +2,15 @@ Package of Python scripts for Oceanography (Python +3.6) -## Synopsis - -This lib was made by **Iury T. Simoes-Sousa** (). - -Now this package have this subpackages below: - -- **OA** -- **DYN** -- **EOF** - ## Code Example -Check `examples` folder in our [github repository](github.com/iuryt/OceanLab). +Check `examples` folder in our [github repository](../../tree/master/examples). ## Installation `pip install OceanLab` -## Documentation +## Modules - **OA** - *vectoa()*: Objective analysis for vectorial fields; @@ -34,7 +24,13 @@ Check `examples` folder in our [github repository](github.com/iuryt/OceanLab). - **EOF** - *eoft()*: Calculates the Empirical Orthogonal Functions; - *my_eof_interp()*: Fillgaps on matrix based on EOFs (translated from Cesar Rocha Matlab version); + - *ceof()*: Performs the Complex (or Hilbert) Empirical Orthogonal Functions decomposition; + - *reconstruct_ceof()*: Reconstructs the CEOF modes individually; +- **UTILS** + - *argdistnear()*: Searchs the position of the closest points in an array to a reference point; + - *meaneddy()*: Performs an eddy-mean decomposition with a low-pass filter; + ## Contributors -Everyone can contribute to this code. Some of functions was based on Filipe Fernandes or Cesar Rocha functions and some of them was created with help of Dante C. Napolitano, Hélio M. R. Almeida and Wandrey Watanabe at Ocean Dynamics Lab of University of São Paulo (USP). \ No newline at end of file +Everyone can contribute to this code. Some of functions were based on Filipe Fernandes or Cesar Rocha functions and some of them were created with help of Dante C. Napolitano, Hélio M. R. Almeida, Wandrey Watanabe, and Felipe Vilela-Silva at Ocean Dynamics Lab of University of São Paulo (USP). diff --git a/examples/DYN_EOF.ipynb b/examples/DYN_EOF.ipynb index 7feb8d1..6f04e4f 100644 --- a/examples/DYN_EOF.ipynb +++ b/examples/DYN_EOF.ipynb @@ -284,7 +284,7 @@ } ], "source": [ - "n = 100\n", + "q = 100\n", "xi,yi = np.meshgrid(np.linspace(0,15,q),np.linspace(0,15,q))\n", "\n", "vorti = DYN.zeta(xi,yi,lambdify((x,y),u)(xi,yi),lambdify((x,y),v)(xi,yi))\n", diff --git a/requirements.txt b/requirements.txt index 9d87074..948d657 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ numpy >= 1.8.2 -seawater >= 3.3.1 \ No newline at end of file +seawater >= 3.3.1 +scipy >= 1.6.3 +xarray >= 0.18.2 +dask >= 2021.06.0 +dask[distributed] >= 2021.06.0 diff --git a/setup.py b/setup.py index de260cc..8a31867 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,25 @@ from setuptools import setup +def read(file): + return open(file, 'r').read() -with open('README.md','r') as fh: - long_description = fh.read() +LONG_DESCRIPTION = read('README.md') +LICENSE = read('LICENSE.txt') setup( name='OceanLab', - version='0.0.1', + version='0.1.0', + packages=['OceanLab'], + include_package_data=True, description='Python functions for Physical Oceanography', - long_description=long_description, + long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', + download_url = 'https://pypi.python.org/pypi/OceanLab', url='https://github.com/iuryt/OceanLab', author='Iury T. Simoes-Sousa', author_email='simoesiury@gmail.com', - license='MIT', + license=LICENSE, py_modules=['OA','EOF','DYN'], - package_dir={'': 'src'}, classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", @@ -26,5 +30,9 @@ install_requires = [ 'seawater ~= 3.3', 'numpy ~= 1.18', + 'scipy ~= 1.6', + 'xarray ~= 0.18', + 'dask ~= 2021.06', + 'dask[distributed] ~= 2021.06' ], ) diff --git a/src/OceanLab/EOF.py b/src/OceanLab/EOF.py deleted file mode 100644 index 60a0244..0000000 --- a/src/OceanLab/EOF.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np - -def eoft(trmat,nm=None): - ''' - evals_perc,evecs_norm,amp = eoft(trmat) - - Computes eof in time (or depth) for general cases - normally trmat is a matrix containing each time series - (or vertical profiles) as a row. - That is, for N stations having m data points, - trmat is a N by m matrix. - ''' - - #if is masked the data - try: - Trmat = trmat.data - Trmat[trmat.mask] = np.nan - trmat = Trmat.copy() - except: - None - - #% demeans trmat prior to doing eof - trmat = (trmat.T-np.nanmean(trmat,axis=1)).T - #% computes zero-lag cross-covariance matrix - mcov = np.dot(trmat,trmat.T)/(trmat.shape[1]-1) - #% computes eigenvalues and eigenvectors - evals,evecs = np.linalg.eig(mcov) - #find the descending order of eigenvalues - ind = np.argsort(evals)[::-1] - #% normalize eigenvectors - evecs_norm = evecs[:,ind]/np.linalg.norm(evecs[:,ind],axis=0) - #% sort eigenvalues and computes percent variance explained by each mode - evals_perc = evals[ind]/np.sum(evals) - #% computes amplitude functions - amp = np.dot(evecs_norm.T,trmat) - - #if was chosen a number of eigenvectors - if nm!=None: - evecs_norm = evecs_norm[:,:nm] - evals_perc = evals_perc[:nm] - amp = amp[:nm,:] - - return evals_perc,evecs_norm,amp - - - -def my_eof_interp(M,nmodes,errmin=1e-15,repmax=None): - #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% vi = my_eof_interp(M,nmodes) - #% This function try to fill gappy data using its EOFs - #% - #% M is the data Matrix (generaly the diferent stations - #% are on the columns); - #% - #% nmodes is the number of statiscally significant EOF modes (to be used for the series reconstruction) - #% - #%% The method -- based on Beckers & Rixen (2003, JAOTech) - #% - #% The algorithm proposed by the authors is as follows. (see Eqs. (13) and (14) from Beckers & Rixen) - #% - #% (I) Put zero in the GAPPY positions ("unbiased INITIAL GUESS, by assuming that the removed mean was unbiased). - #% - #% (II) Then you can have a first estimative of the EOFs. - #% - #% (III) You can reconstruct the series using the statiscally significant modes (e.g.: determined using Monte Carlo - #% simulations as in Preisendorfer [1988] -- OBS: the authors propose another method using cross validation - #% during the iterative process) - #% - #% (IV) Substitute the reconstructed series ONLY in the GAPPY positions in your original matrix. - #% - #% (V) Run steps (II), (III) and (IV) iteractively till its convergence (i.e. compare the reconstructed series at time - #% t+1 with that at time t). - #% - #% OBS: It doesn't work for filling gaps simultaneously in all depths. - - Mmean = np.nanmean(M,axis=1) - - M = M.T - np.nanmean(M,axis=1) - M = M.T - - f = np.isnan(M) - M[f] = 0 #%% initial guess in gappy positions - - rep = 0 - while True: - print(rep+1) - - - Ud,Dd,Vd = np.linalg.svd(M,'econ') - Dd = np.diagflat(Dd) - - #% compute the explained variance (total variance is the sum of the Dd elements squared -total energy-) - evd = np.diag(Dd)**2 #%% remember that the singular values (Dd) are the sqrt of - evd = Dd/np.sum(Dd) #%% the eigenvalue of the cov. matrix. - - - #%% truncated EOF reconstruction - Ud = Ud[:,0:nmodes] - Vd = Vd[:,0:nmodes] - Dd = Dd[0:nmodes,0:nmodes] - #Dd = Dd[0:nmodes] - - #% reconstructing the series -- truncated reconstruction (SVD: Mrec = Ud * Dd *Vd') - Xa = np.dot(np.dot(Ud,Dd),Vd.T) - - Mold = M.copy() #% saving for compute the error - - M[f] = Xa[f] - - #% testing the error - if rep>=1: - err = np.nanmean(np.abs(M - Mold))/(np.nanmax(M)-np.nanmin(M)) - print(err) - if err < errmin: - break - if repmax!=None: - if rep>=repmax: - break - rep += 1 - - #%% Saving the last the series with the EOF reconstructed series in the gappy positions; - vi = (M.T+Mmean).T - - return vi