"""
Python Glacier Evolution Model (PyGEM)
copyright © 2018 David Rounce <drounce@cmu.edu
Distrubted under the MIT lisence
class of climate data and functions associated with manipulating the dataset to be in the proper format
"""
import os
import numpy as np
# External libraries
import pandas as pd
import xarray as xr
from pygem.setup.config import ConfigManager
# instantiate ConfigManager
config_manager = ConfigManager()
# read the config
pygem_prms = config_manager.read_config()
[docs]
class GCM:
"""
Global climate model data properties and functions used to automatically retrieve data.
Attributes
----------
name : str
name of climate dataset.
sim_climate_scenario : str
rcp or ssp sim_climate_scenario (example: 'rcp26' or 'ssp585')
realization : str
realization from large ensemble (example: '1011.001' or '1301.020')
"""
def __init__(self, name=str(), sim_climate_scenario=str(), realization=None):
"""
Add variable name and specific properties associated with each gcm.
"""
if pygem_prms['rgi']['rgi_lon_colname'] not in ['CenLon_360']:
assert 1 == 0, (
'Longitude does not use 360 degrees. Check how negative values are handled!'
)
# Source of climate data
self.name = name
# If multiple realizations from each model+sim_climate_scenario are being used,
# then self.realization = realization.
# Otherwise, the realization attribute is not considered for single
# realization model+sim_climate_scenario simulations.
if realization is not None:
self.realization = realization
# Set parameters for CESM2 Large Ensemble
if self.name == 'smbb.f09_g17.LE2':
# Standardized CESM2 Large Ensemble format (GCM/SSP)
# Variable names
self.temp_vn = 'tas'
self.prec_vn = 'pr'
self.elev_vn = 'orog'
self.lat_vn = 'lat'
self.lon_vn = 'lon'
self.time_vn = 'time'
# Variable filenames
self.temp_fn = (
self.temp_vn
+ '_mon_'
+ sim_climate_scenario
+ '_'
+ name
+ '-'
+ realization
+ '.cam.h0.1980-2100.nc'
)
self.prec_fn = (
self.prec_vn
+ '_mon_'
+ sim_climate_scenario
+ '_'
+ name
+ '-'
+ realization
+ '.cam.h0.1980-2100.nc'
)
self.elev_fn = (
self.elev_vn
+ '_fx_'
+ sim_climate_scenario
+ '_'
+ name
+ '.cam.h0.nc'
)
# Variable filepaths
self.var_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cesm2_relpath']
+ sim_climate_scenario
+ pygem_prms['climate']['paths']['cesm2_fp_var_ending']
)
self.fx_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cesm2_relpath']
+ sim_climate_scenario
+ pygem_prms['climate']['paths']['cesm2_fp_fx_ending']
)
# Extra information
self.timestep = pygem_prms['time']['timestep']
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
self.sim_climate_scenario = sim_climate_scenario
# Set parameters for GFDL SPEAR Large Ensemble
elif self.name == 'GFDL-SPEAR-MED':
# Standardized GFDL SPEAR Large Ensemble format (GCM/SSP)
# Variable names
self.temp_vn = 'tas'
self.prec_vn = 'pr'
self.elev_vn = 'zsurf'
self.lat_vn = 'lat'
self.lon_vn = 'lon'
self.time_vn = 'time'
# Variable filenames
self.temp_fn = (
self.temp_vn
+ '_mon_'
+ sim_climate_scenario
+ '_'
+ name
+ '-'
+ realization
+ 'i1p1f1_gr3_1980-2100.nc'
)
self.prec_fn = (
self.prec_vn
+ '_mon_'
+ sim_climate_scenario
+ '_'
+ name
+ '-'
+ realization
+ 'i1p1f1_gr3_1980-2100.nc'
)
self.elev_fn = (
self.elev_vn + '_fx_' + sim_climate_scenario + '_' + name + '.nc'
)
# Variable filepaths
self.var_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['gfdl_relpath']
+ sim_climate_scenario
+ pygem_prms['climate']['paths']['gfdl_fp_var_ending']
)
self.fx_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['gfdl_relpath']
+ sim_climate_scenario
+ pygem_prms['climate']['paths']['gfdl_fp_fx_ending']
)
# Extra information
self.timestep = pygem_prms['time']['timestep']
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
self.sim_climate_scenario = sim_climate_scenario
else:
self.realization = []
# Set parameters for ERA5, ERA-Interim, and CMIP5 netcdf files
if self.name == 'ERA5':
# Variable names
self.temp_vn = 't2m'
self.tempstd_vn = 't2m_std'
self.prec_vn = 'tp'
self.elev_vn = 'z'
self.lat_vn = 'latitude'
self.lon_vn = 'longitude'
self.time_vn = 'time'
self.lr_vn = 'lapserate'
# Variable filenames
self.temp_fn = pygem_prms['climate']['paths']['era5_temp_fn']
self.tempstd_fn = pygem_prms['climate']['paths']['era5_tempstd_fn']
self.prec_fn = pygem_prms['climate']['paths']['era5_prec_fn']
self.elev_fn = pygem_prms['climate']['paths']['era5_elev_fn']
self.lr_fn = pygem_prms['climate']['paths']['era5_lr_fn']
# Variable filepaths
self.var_fp = (
pygem_prms['root'] + pygem_prms['climate']['paths']['era5_relpath']
)
self.fx_fp = (
pygem_prms['root'] + pygem_prms['climate']['paths']['era5_relpath']
)
# Extra information
self.timestep = pygem_prms['time']['timestep']
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
elif self.name == 'ERA-Interim':
# Variable names
self.temp_vn = 't2m'
self.prec_vn = 'tp'
self.elev_vn = 'z'
self.lat_vn = 'latitude'
self.lon_vn = 'longitude'
self.time_vn = 'time'
self.lr_vn = 'lapserate'
# Variable filenames
self.temp_fn = pygem_prms['climate']['paths']['eraint_temp_fn']
self.prec_fn = pygem_prms['climate']['paths']['eraint_prec_fn']
self.elev_fn = pygem_prms['climate']['paths']['eraint_elev_fn']
self.lr_fn = pygem_prms['climate']['paths']['eraint_lr_fn']
# Variable filepaths
self.var_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['eraint_relpath']
)
self.fx_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['eraint_relpath']
)
# Extra information
self.timestep = pygem_prms['time']['timestep']
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
# Standardized CMIP5 format (GCM/RCP)
elif 'rcp' in sim_climate_scenario:
# Variable names
self.temp_vn = 'tas'
self.prec_vn = 'pr'
self.elev_vn = 'orog'
self.lat_vn = 'lat'
self.lon_vn = 'lon'
self.time_vn = 'time'
# Variable filenames
self.temp_fn = (
self.temp_vn
+ '_mon_'
+ name
+ '_'
+ sim_climate_scenario
+ '_r1i1p1_native.nc'
)
self.prec_fn = (
self.prec_vn
+ '_mon_'
+ name
+ '_'
+ sim_climate_scenario
+ '_r1i1p1_native.nc'
)
self.elev_fn = (
self.elev_vn
+ '_fx_'
+ name
+ '_'
+ sim_climate_scenario
+ '_r0i0p0.nc'
)
# Variable filepaths
self.var_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cmip5_relpath']
+ sim_climate_scenario
+ pygem_prms['climate']['paths']['cmip5_fp_var_ending']
)
self.fx_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cmip5_relpath']
+ sim_climate_scenario
+ pygem_prms['climate']['paths']['cmip5_fp_fx_ending']
)
if not os.path.exists(self.var_fp) and os.path.exists(
pygem_prms['climate']['paths']['cmip5_relpath'] + name + '/'
):
self.var_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cmip5_relpath']
+ name
+ '/'
)
if not os.path.exists(self.fx_fp) and os.path.exists(
pygem_prms['climate']['paths']['cmip5_relpath'] + name + '/'
):
self.fx_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cmip5_relpath']
+ name
+ '/'
)
# Extra information
self.timestep = pygem_prms['time']['timestep']
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
self.sim_climate_scenario = sim_climate_scenario
# Standardized CMIP6 format (GCM/SSP)
elif 'ssp' in sim_climate_scenario:
# Variable names
self.temp_vn = 'tas'
self.prec_vn = 'pr'
self.elev_vn = 'orog'
self.lat_vn = 'lat'
self.lon_vn = 'lon'
self.time_vn = 'time'
# Variable filenames
self.temp_fn = (
name
+ '_'
+ sim_climate_scenario
+ '_r1i1p1f1_'
+ self.temp_vn
+ '.nc'
)
self.prec_fn = (
name
+ '_'
+ sim_climate_scenario
+ '_r1i1p1f1_'
+ self.prec_vn
+ '.nc'
)
self.elev_fn = name + '_' + self.elev_vn + '.nc'
# Variable filepaths
self.var_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cmip6_relpath']
+ name
+ '/'
)
self.fx_fp = (
pygem_prms['root']
+ pygem_prms['climate']['paths']['cmip6_relpath']
+ name
+ '/'
)
# Extra information
self.timestep = pygem_prms['time']['timestep']
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
self.sim_climate_scenario = sim_climate_scenario
[docs]
def importGCMfxnearestneighbor_xarray(self, filename, vn, main_glac_rgi):
"""
Import time invariant (constant) variables and extract nearest neighbor.
Note: cmip5 data used surface height, while ERA-Interim data is geopotential
Parameters
----------
filename : str
filename of variable
variablename : str
variable name
main_glac_rgi : pandas dataframe
dataframe containing relevant rgi glacier information
Returns
-------
glac_variable : numpy array
array of nearest neighbor values for all the glaciers in model run (rows=glaciers, column=variable)
"""
# Import netcdf file
data = xr.open_dataset(self.fx_fp + filename)
glac_variable = np.zeros(main_glac_rgi.shape[0])
# If time dimension included, then set the time index (required for ERA Interim, but not for CMIP5 or COAWST)
if 'time' in data[vn].coords:
time_idx = 0
# ERA Interim has only 1 value of time, so index is 0
# Find Nearest Neighbor
if self.name == 'COAWST':
for glac in range(main_glac_rgi.shape[0]):
latlon_dist = (
(
data[self.lat_vn].values
- main_glac_rgi[self.rgi_lat_colname].values[glac]
)
** 2
+ (
data[self.lon_vn].values
- main_glac_rgi[self.rgi_lon_colname].values[glac]
)
** 2
) ** 0.5
latlon_nearidx = [
x[0] for x in np.where(latlon_dist == latlon_dist.min())
]
lat_nearidx = latlon_nearidx[0]
lon_nearidx = latlon_nearidx[1]
glac_variable[glac] = data[vn][
latlon_nearidx[0], latlon_nearidx[1]
].values
else:
# argmin() finds the minimum distance between the glacier lat/lon and the GCM pixel
lat_nearidx = np.abs(
main_glac_rgi[self.rgi_lat_colname].values[:, np.newaxis]
- data.variables[self.lat_vn][:].values
).argmin(axis=1)
lon_nearidx = np.abs(
main_glac_rgi[self.rgi_lon_colname].values[:, np.newaxis]
- data.variables[self.lon_vn][:].values
).argmin(axis=1)
latlon_nearidx = list(zip(lat_nearidx, lon_nearidx))
latlon_nearidx_unique = list(set(latlon_nearidx))
glac_variable_dict = {}
for latlon in latlon_nearidx_unique:
try:
glac_variable_dict[latlon] = data[vn][
time_idx, latlon[0], latlon[1]
].values
except:
glac_variable_dict[latlon] = data[vn][latlon[0], latlon[1]].values
glac_variable = np.array([glac_variable_dict[x] for x in latlon_nearidx])
# Correct units if necessary (CMIP5 already in m a.s.l., ERA Interim is geopotential [m2 s-2])
if vn == self.elev_vn:
# If the variable has units associated with geopotential, then convert to m.a.s.l (ERA Interim)
if 'units' in data[vn].attrs and (data[vn].attrs['units'] == 'm**2 s**-2'):
# Convert m2 s-2 to m by dividing by gravity (ERA Interim states to use 9.80665)
glac_variable = glac_variable / 9.80665
# Elseif units already in m.a.s.l., then continue
elif 'units' in data[vn].attrs and data[vn].attrs['units'] == 'm':
pass
# Otherwise, provide warning
else:
print('Check units of elevation from GCM is m.')
return glac_variable
[docs]
def importGCMvarnearestneighbor_xarray(
self,
filename,
vn,
main_glac_rgi,
dates_table,
realizations=['r1i1p1f1', 'r4i1p1f1'],
):
"""
Import time series of variables and extract nearest neighbor.
Note: "NG" refers to a homogenized "new generation" of products from ETH-Zurich.
The function is setup to select netcdf data using the dimensions: time, latitude, longitude (in that
order). Prior to running the script, the user must check that this is the correct order of the dimensions
and the user should open the netcdf file to determine the names of each dimension as they may vary.
Parameters
----------
filename : str
filename of variable
vn : str
variable name
main_glac_rgi : pandas dataframe
dataframe containing relevant rgi glacier information
dates_table: pandas dataframe
dataframe containing dates of model run
Returns
-------
glac_variable_series : numpy array
array of nearest neighbor values for all the glaciers in model run (rows=glaciers, columns=time series)
time_series : numpy array
array of dates associated with the meteorological data (may differ slightly from those in table for monthly
timestep, i.e., be from the beginning/middle/end of month)
"""
# Import netcdf file
if not os.path.exists(self.var_fp + filename):
if os.path.exists(self.var_fp + filename.replace('r1i1p1f1', 'r4i1p1f1')):
filename = filename.replace('r1i1p1f1', 'r4i1p1f1')
if os.path.exists(self.var_fp + filename.replace('_native', '')):
filename = filename.replace('_native', '')
data = xr.open_dataset(self.var_fp + filename)
glac_variable_series = np.zeros((main_glac_rgi.shape[0], dates_table.shape[0]))
# Check GCM provides required years of data
years_check = pd.Series(data['time']).apply(lambda x: int(x.strftime('%Y')))
assert years_check.max() >= dates_table.year.max(), (
self.name + ' does not provide data out to ' + str(dates_table.year.max())
)
assert years_check.min() <= dates_table.year.min(), (
self.name + ' does not provide data back to ' + str(dates_table.year.min())
)
# Determine the correct time indices
if self.timestep == 'monthly':
start_idx = (
np.where(
pd.Series(data[self.time_vn]).apply(lambda x: x.strftime('%Y-%m'))
== dates_table['date'].apply(lambda x: x.strftime('%Y-%m'))[0]
)
)[0][0]
end_idx = (
np.where(
pd.Series(data[self.time_vn]).apply(lambda x: x.strftime('%Y-%m'))
== dates_table['date'].apply(lambda x: x.strftime('%Y-%m'))[
dates_table.shape[0] - 1
]
)
)[0][0]
# np.where finds the index position where to values are equal
# pd.Series(data.variables[gcm_time_varname]) creates a pandas series of the time variable associated with
# the netcdf
# .apply(lambda x: x.strftime('%Y-%m')) converts the timestamp to a string with YYYY-MM to enable the
# comparison
# > different climate dta can have different date formats, so this standardization for comparison is
# important
# ex. monthly data may provide date on 1st of month or middle of month, so YYYY-MM-DD would not work
# The same processing is done for the dates_table['date'] to facilitate the comparison
# [0] is used to access the first date
# dates_table.shape[0] - 1 is used to access the last date
# The final indexing [0][0] is used to access the value, which is inside of an array containing extraneous
# information
elif self.timestep == 'daily':
start_idx = (
np.where(
pd.Series(data[self.time_vn]).apply(
lambda x: x.strftime('%Y-%m-%d')
)
== dates_table['date'].apply(lambda x: x.strftime('%Y-%m-%d'))[0]
)
)[0][0]
end_idx = (
np.where(
pd.Series(data[self.time_vn]).apply(
lambda x: x.strftime('%Y-%m-%d')
)
== dates_table['date'].apply(lambda x: x.strftime('%Y-%m-%d'))[
dates_table.shape[0] - 1
]
)
)[0][0]
# Extract the time series
time_series = pd.Series(data[self.time_vn][start_idx : end_idx + 1])
# Find Nearest Neighbor
if self.name == 'COAWST':
for glac in range(main_glac_rgi.shape[0]):
latlon_dist = (
(
data[self.lat_vn].values
- main_glac_rgi[self.rgi_lat_colname].values[glac]
)
** 2
+ (
data[self.lon_vn].values
- main_glac_rgi[self.rgi_lon_colname].values[glac]
)
** 2
) ** 0.5
latlon_nearidx = [
x[0] for x in np.where(latlon_dist == latlon_dist.min())
]
lat_nearidx = latlon_nearidx[0]
lon_nearidx = latlon_nearidx[1]
glac_variable_series[glac, :] = data[vn][
start_idx : end_idx + 1, latlon_nearidx[0], latlon_nearidx[1]
].values
else:
# argmin() finds the minimum distance between the glacier lat/lon and the GCM pixel; .values is used to
# extract the position's value as opposed to having an array
lat_nearidx = np.abs(
main_glac_rgi[self.rgi_lat_colname].values[:, np.newaxis]
- data.variables[self.lat_vn][:].values
).argmin(axis=1)
lon_nearidx = np.abs(
main_glac_rgi[self.rgi_lon_colname].values[:, np.newaxis]
- data.variables[self.lon_vn][:].values
).argmin(axis=1)
# Find unique latitude/longitudes
latlon_nearidx = list(zip(lat_nearidx, lon_nearidx))
latlon_nearidx_unique = list(set(latlon_nearidx))
# Create dictionary of time series for each unique latitude/longitude
glac_variable_dict = {}
for latlon in latlon_nearidx_unique:
if 'expver' in data.keys():
expver_idx = 0
glac_variable_dict[latlon] = data[vn][
start_idx : end_idx + 1, expver_idx, latlon[0], latlon[1]
].values
else:
glac_variable_dict[latlon] = data[vn][
start_idx : end_idx + 1, latlon[0], latlon[1]
].values
# Convert to series
glac_variable_series = np.array(
[glac_variable_dict[x] for x in latlon_nearidx]
)
# Perform corrections to the data if necessary
# Surface air temperature corrections
if vn in ['tas', 't2m', 'T2']:
if 'units' in data[vn].attrs and data[vn].attrs['units'] == 'K':
# Convert from K to deg C
glac_variable_series = glac_variable_series - 273.15
else:
print('Check units of air temperature from GCM is degrees C.')
elif vn in ['t2m_std']:
if 'units' in data[vn].attrs and data[vn].attrs['units'] not in ['C', 'K']:
print(
'Check units of air temperature standard deviation from GCM is degrees C or K'
)
# Precipitation corrections
# If the variable is precipitation
elif vn in ['pr', 'tp', 'TOTPRECIP']:
# If the variable has units and those units are meters (ERA Interim)
if 'units' in data[vn].attrs and data[vn].attrs['units'] == 'm':
pass
# Elseif the variable has units and those units are kg m-2 s-1 (CMIP5/CMIP6)
elif 'units' in data[vn].attrs and data[vn].attrs['units'] == 'kg m-2 s-1':
# Convert from kg m-2 s-1 to m day-1
glac_variable_series = glac_variable_series / 1000 * 3600 * 24
# (1 kg m-2 s-1) * (1 m3/1000 kg) * (3600 s / hr) * (24 hr / day) = (m day-1)
# Elseif the variable has units and those units are mm (COAWST)
elif 'units' in data[vn].attrs and data[vn].attrs['units'] == 'mm':
glac_variable_series = glac_variable_series / 1000
# Else check the variables units
else:
print('Check units of precipitation from GCM is meters per day.')
if self.timestep == 'monthly' and self.name != 'COAWST':
# Convert from meters per day to meters per month (COAWST data already 'monthly accumulated precipitation')
if 'daysinmonth' in dates_table.columns:
glac_variable_series = (
glac_variable_series
* dates_table['daysinmonth'].values[np.newaxis, :]
)
elif vn != self.lr_vn:
print('Check units of air temperature or precipitation')
return glac_variable_series, time_series