__all__ = [
'get_proj4', 'customize_grid', 'def_grid_kw', 'shared_grid_kw',
'coverages_from_xml', 'legacy_get', 'get_file'
]
import requests
def_grid_kw = {
'12US1': dict(
GDNAM='12US1', GDTYP=2, NCOLS=459, NROWS=299,
XORIG=-2556000.0, YORIG=-1728000.0, XCELL=12000., YCELL=12000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'4US1': dict(
GDNAM='4US1', GDTYP=2, NCOLS=459 * 3, NROWS=299 * 3,
XORIG=-2556000.0, YORIG=-1728000.0, XCELL=4000., YCELL=4000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'1US1': dict(
GDNAM='1US1', GDTYP=2, NCOLS=459 * 12, NROWS=299 * 12,
XORIG=-2556000.0, YORIG=-1728000.0, XCELL=1000., YCELL=1000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'12US2': dict(
GDNAM='12US2', GDTYP=2, NCOLS=396, NROWS=246,
XORIG=-2412000.0, YORIG=-1620000.0, XCELL=12000., YCELL=12000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'4US2': dict(
GDNAM='4US2', GDTYP=2, NCOLS=396 * 3, NROWS=246 * 3,
XORIG=-2412000.0, YORIG=-1620000.0, XCELL=4000., YCELL=4000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'1US2': dict(
GDNAM='1US2', GDTYP=2, NCOLS=396 * 12, NROWS=246 * 12,
XORIG=-2412000.0, YORIG=-1620000.0, XCELL=1000., YCELL=1000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'36US3': dict(
GDNAM='36US3', GDTYP=2, NCOLS=172, NROWS=148,
XORIG=-2952000.0, YORIG=-2772000.0, XCELL=36000., YCELL=36000.,
P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
),
'108NHEMI2': dict(
GDNAM='108NHEMI2', GDTYP=6, NCOLS=187, NROWS=187,
XORIG=-10098000.0, YORIG=-10098000.0, XCELL=108000., YCELL=108000.,
P_ALP=1., P_BET=45., P_GAM=-98., XCENT=-98., YCENT=90.
),
'36NHEMI2': dict(
GDNAM='36NHEMI2', GDTYP=6, NCOLS=187 * 3, NROWS=187 * 3,
XORIG=-10098000.0, YORIG=-10098000.0, XCELL=36000., YCELL=36000.,
P_ALP=1., P_BET=45., P_GAM=-98., XCENT=-98., YCENT=90.
),
'NORTHSOUTHAM': dict(
GDNAM='NORTHSOUTHAM', GDTYP=7, NCOLS=179, NROWS=154,
XORIG=251759.25, YORIG=-1578187., XCELL=27000., YCELL=27000.,
P_ALP=0., P_BET=0., P_GAM=-98., XCENT=-98., YCENT=0.
),
'global_1pt0': dict(
GDNAM='GLOBAL', GDTYP=1, NCOLS=360, NROWS=180,
XORIG=-180, YORIG=-90, XCELL=1., YCELL=1.,
P_ALP=0., P_BET=0., P_GAM=0., XCENT=0., YCENT=0.
),
'global_0pt1': dict(
GDNAM='GLOBAL', GDTYP=1, NCOLS=3600, NROWS=1800,
XORIG=-180, YORIG=-90, XCELL=0.1, YCELL=0.1,
P_ALP=0., P_BET=0., P_GAM=0., XCENT=0., YCENT=0.
),
}
shared_grid_kw = dict(
VGTYP=7, VGTOP=5000., NLAYS=35, earth_radius=6370000., g=9.81, R=287.04,
A=50., T0=290, P0=1000e2, REGRID_AGGREGATE='None'
)
for key in def_grid_kw:
for pk, pv in shared_grid_kw.items():
def_grid_kw[key].setdefault(pk, pv)
[docs]def get_proj4(attrs, earth_radius=6370000.):
"""
Create a proj4 formatted grid definition using IOAPI attrs and earth_radius
Arguments
---------
attrs : dict-like
Mappable of IOAPI properties that supports the items method
earth_radius : float
Assumed radius of the earth. 6370000 is the WRF default.
Returns
-------
projstr : str
proj4 formatted string such that the domain southwest corner starts at
(0, 0) and ends at (NCOLS, NROWS)
"""
props = {k: v for k, v in attrs.items()}
props['x_0'] = -props['XORIG']
props['y_0'] = -props['YORIG']
props.setdefault('earth_radius', earth_radius)
if props['GDTYP'] == 1:
projstr = '+proj=lonlat +R={earth_radius}'.format(**props)
elif props['GDTYP'] == 2:
projstr = (
'+proj=lcc +lat_1={P_ALP} +lat_2={P_BET} +lat_0={YCENT}'
' +lon_0={XCENT} +R={earth_radius} +x_0={x_0} +y_0={y_0}'
' +to_meter={XCELL} +no_defs'
).format(**props)
elif props['GDTYP'] == 6:
projstr = (
'+proj=stere +lat_0={lat_0} +lat_ts={P_BET} +lon_0={XCENT}'
+ ' +x_0={x_0} +y_0={y_0} +R={earth_radius} +to_meter={XCELL}'
+ ' +no_defs'
).format(lat_0=props['P_ALP'] * 90, **props)
elif props['GDTYP'] == 7:
projstr = (
'+proj=merc +R={earth_radius} +lat_ts=0 +lon_0={XCENT}'
+ ' +x_0={x_0} +y_0={y_0} +to_meter={XCELL}'
+ ' +no_defs'
).format(**props)
else:
raise ValueError('GDTYPE {GDTYP} not implemented'.format(**props))
return projstr
[docs]def customize_grid(grid_kw, bbox, clip=True):
"""
Redefine grid_kw to cover bbox by removing extra rows and columns and
redefining XORIG, YORIG, NCOLS and NROWS.
Arguments
---------
grid_kw : dict or str
If str, must be a known grid in default grids.
If dict, must include all IOAPI grid metadata properties
bbox : tuple
wlon, slat, elon, nlat in decimal degrees (-180 to 180)
clip : bool
If True, limit grid to original grid bounds
Returns
-------
ogrid_kw : dict
IOAPI grid metadata properties with XORIG/YORIG and NCOLS/NROWS
adjusted such that it only covers bbox or (if clip) only covers
the portion of bbox covered by the original grid_kw.
"""
import pyproj
import numpy as np
if isinstance(grid_kw, str):
grid_kw = def_grid_kw[grid_kw]
ogrid_kw = {k: v for k, v in grid_kw.items()}
# Lonlat box must be treated separately
if ogrid_kw['GDTYP'] == 1:
llx, lly = bbox[:2]
urx, ury = bbox[2:]
ncols = int(np.ceil((urx - llx) / ogrid_kw['XCELL']) + 4)
nrows = int(np.ceil((ury - lly) / ogrid_kw['YCELL']) + 4)
xorig = (int(llx / ogrid_kw['XCELL']) - 1) * ogrid_kw['XCELL']
yorig = (int(lly / ogrid_kw['YCELL']) - 1) * ogrid_kw['YCELL']
ogrid_kw['NCOLS'] = ncols
ogrid_kw['NROWS'] = nrows
ogrid_kw['XORIG'] = xorig
ogrid_kw['YORIG'] = yorig
return ogrid_kw
proj4str = get_proj4(grid_kw)
proj = pyproj.Proj(proj4str)
llx, lly = proj(*bbox[:2])
urx, ury = proj(*bbox[2:])
midx, midy = proj((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
maxy = np.max([lly, ury, midy])
miny = np.min([lly, ury, midy])
maxx = np.max([llx, urx, midx])
minx = np.min([llx, urx, midx])
lli, llj = np.floor([minx, miny]).astype('i')
uri, urj = np.ceil([maxx, maxy]).astype('i')
if clip:
lli, llj = np.maximum(0, [lli, llj])
uri = np.minimum(grid_kw['NCOLS'], uri)
urj = np.minimum(grid_kw['NROWS'], urj)
ogrid_kw['XORIG'] = grid_kw['XORIG'] + lli * grid_kw['XCELL']
ogrid_kw['YORIG'] = grid_kw['YORIG'] + llj * grid_kw['YCELL']
ogrid_kw['NCOLS'] = uri - lli
ogrid_kw['NROWS'] = urj - llj
return ogrid_kw
def quickstats(df, refkey='obs'):
"""
Simple utility to calculate quantiles, mean, correlation, bias statistics,
and Index of Agreement. All statistics have the input data units except
for nmb, fmb, correlation, and ioa -- all those are unitless.
Arguments
---------
df : pandas.DataFrame
Must contain at least refkey as a column. All other columns will be
treated as estimates.
refkey : str
Key to be used as the reference (or truth or observation) value.
Returns
-------
statdf : pandas.DataFrame
Contains results where each statistic is a row and columns are refkey
and all the estimates
Note:
For nmb or fmb in percent, multiply by 100.
"""
statsdf = df.describe()
statsdf.loc['r'] = df.corr().loc[refkey]
meands = statsdf.loc['mean']
# Mean Bias [ppb] # same as ozone and CMAQ_O3
statsdf.loc['mb'] = meands - meands[refkey]
# Normalized Mean Bias [1]
statsdf.loc['nmb'] = statsdf.loc['mb'] / meands[refkey]
# Fractional Mean Bias [1]
statsdf.loc['fmb'] = 2 * statsdf.loc['mb'] / meands.sum()
# IOA [1]
bias = df.subtract(df[refkey], axis=0)
sqerr = bias**2
apdev = df.subtract(meands[refkey]).abs()
statsdf.loc['ioa'] = (
1 - sqerr.sum() / (apdev.add(apdev[refkey], axis=0)**2).sum()
)
return statsdf
def parsexml(root):
"""Recursive xml parsing:
Given a root, return dictionaries for each element and its children.
Each element has children, attributes (attr), tag, and text.
If any of these has no elements, it will be removed.
"""
out = {}
out['tag'] = root.tag.split('}')[-1]
out['attr'] = root.attrib
out['text'] = root.text
out['children'] = []
for child in root:
childd = parsexml(child)
out['children'].append(childd)
if len(out['children']) == 0:
del out['children']
if out['text'] is None:
out['text'] = ''
out['text'] = out['text'].strip()
if len(out['text']) == 0:
del out['text']
if len(out['attr']) == 0:
del out['attr']
return out
[docs]def coverages_from_xml(txt):
"""Based on xml text, create coverage data"""
import xml.etree.ElementTree as ET
root = ET.fromstring(txt)
xmlout = parsexml(root)
out = []
for c in xmlout['children']:
record = {k: v for k, v in c.items() if k != 'children'}
kids = c['children']
for e in kids:
if 'attr' not in e and len(e.get('children', [])) == 0:
record[e['tag']] = e.get('text', '')
if e['tag'] == 'lonLatEnvelope':
envtxt = ''
for p in e['children']:
envtxt += ' ' + p['text']
record['bbox_str'] = envtxt.strip()
if e['tag'] == 'domainSet':
for s in e['children']:
if s['tag'] == 'temporalDomain':
for tp in s['children']:
for te in tp['children']:
record[te['tag']] = te['text']
out.append(record)
return out
[docs]def legacy_get(url, *args, **kwds):
import copy
session = requests.session()
la = LegacyAdapter()
# Maple has a bad certificate, so here if you are using maple
# I disable the certificate, but no the warning
if 'maple' in url:
la.ssl_context.check_hostname = False
kwds = copy.copy(kwds)
kwds.setdefault('verify', False)
session.mount('https://', la)
return session.get(url, *args, **kwds)
def _create_unverified_tls_context(*args, **kwds):
"""
Thin wrapper around ssl._create_unverified_context that adds the option to
use TLS negotiation, which is currently used by RSIG servers.
"""
import ssl
# Set up SSL context to allow legacy TLS versions
ctx = ssl._create_unverified_context(*args, **kwds)
ctx.options |= 0x4 # OP_LEGACY_SERVER_CONNECT
return ctx
class LegacyAdapter(requests.adapters.HTTPAdapter):
# "Transport adapter" that allows us to use custom ssl_context.
def __init__(self, **kwargs):
import ssl
def_ctx = ssl._create_default_https_context
ssl._create_default_https_context = _create_unverified_tls_context
ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
ctx.options |= 0x4 # OP_LEGACY_SERVER_CONNECT
self.ssl_context = ctx
ssl._create_default_https_context = def_ctx
super().__init__(**kwargs)
def init_poolmanager(self, connections, maxsize, block=False):
import urllib3
self.poolmanager = urllib3.poolmanager.PoolManager(
num_pools=connections, maxsize=maxsize,
block=block, ssl_context=self.ssl_context)
def _progress(blocknum, readsize, totalsize):
"""
Display progress using dots or % indicator.
Arguments
---------
blocknum : int
block number of blocks to be read
readsize : int
chunksize read
totalsize : int
-1 unknown or size of file
"""
totalblocks = (totalsize // readsize) + 1
pblocks = totalblocks // 10
if pblocks <= 0:
pblocks = 100
if totalsize > 0:
print(
'\r' + 'Retrieving {:.0f}'.format(readsize/totalsize*100), end='',
flush=True
)
else:
if blocknum == 0:
print('Retrieving .', end='', flush=True)
if (blocknum % pblocks) == 0:
print('.', end='', flush=True)
[docs]def get_file(url, outpath, maxtries=5, verbose=1, overwrite=False):
"""
Download file from RSIG using fault tolerance and optional caching
when overwrite is False.
Arguments
---------
url : str
path to retrieve
outpath : str
path to save file to
maxtries : int
try this many times before quitting
verbose : int
Level of verbosity
overwrite : bool
If True, overwrite existing files.
If False, reuse existing files.
Returns
-------
None
"""
import time
from urllib.request import urlretrieve
import ssl
import os
from .utils import _create_unverified_tls_context
# If the file exists, get the current size
if not overwrite and os.path.exists(outpath):
stat = os.stat(outpath)
dlsize = stat.st_size
else:
dlsize = 0
# if the size is non-zero, assume it is good
if dlsize > 0 and verbose >= 0:
print('Using cached:', outpath)
return
_def_https_context = ssl._create_default_https_context
ssl._create_default_https_context = _create_unverified_tls_context
# Try to download the file maxtries times
tries = 0
if verbose > 0:
reporthook = _progress
else:
reporthook = None
while dlsize <= 0 and tries < maxtries:
# Remove 0-sized files.
outdir = os.path.dirname(outpath)
if os.path.exists(outpath):
os.remove(outpath)
os.makedirs(outdir, exist_ok=True)
if verbose > 0:
print('Calling RSIG', outpath, '')
t0 = time.time()
urlretrieve(
url=url,
filename=outpath,
reporthook=reporthook,
)
# Check timing
t1 = time.time()
stat = os.stat(outpath)
dlsize = stat.st_size
if dlsize == 0:
print('Failed', url, t1 - t0)
tries += 1
if verbose > 0:
print('')
ssl._create_default_https_context = _def_https_context
def grid2poly(gdattrs):
"""
Arguments
---------
gdattrs : dict
Attributes of IOAPI grid
"""
import numpy as np
from shapely import polygons
import geopandas as gpd
import pandas as pd
# gdattrs = pyrsig.utils.def_grid_kw['12US1']
proj4 = get_proj4(gdattrs)
# Calculate x/y centers
y = np.arange(gdattrs['NROWS']) + 0.5
x = np.arange(gdattrs['NCOLS']) + 0.5
# Create a center for each cell
xy = np.stack(np.meshgrid(x, y), axis=2).reshape(-1, 2)
# Calculate offsets from center for the ll, lr, ur, ul
dll = np.array([-0.5, -0.5])
dlr = np.array([0.5, -0.5])
dur = np.array([0.5, 0.5])
dul = np.array([-0.5, 0.5])
# Create corners at points for polygons
crnr = np.stack([xy + dll, xy + dlr, xy + dur, xy + dul], axis=1)
# Calculate grid polygons
qgeom = polygons(crnr)
index = pd.MultiIndex.from_arrays(xy.T, names=['COL', 'ROW'])
qdf = gpd.GeoDataFrame({}, geometry=qgeom, crs=proj4, index=index)
return qdf
def poly2grid(gdf, gdattrs):
"""
Arguments
---------
gdf : geopandas.GeoDataFrame
Data to create fractional area overlap.
gdattrs : dict
Attributes of IOAPI grid
Returns
-------
oldf : geopandas.GeoDataFrame
Overlap of gdf with grid
"""
import geopandas as gpd
import warnings
qdf = grid2poly(gdattrs)
if gdf.crs is None:
warnings.warn('No CRS provided; assuming input is EPSG:4326')
gdf.crs = 4326
gqdf = gdf.to_crs(qdf.crs)
oldf = gpd.overlay(qdf.reset_index(), gqdf)
oldf['area_overlap'] = oldf['geometry'].area
return oldf
def poly2ioapi(gdf, gdattrs, how='mean'):
import numpy as np
import pandas as pd
ol = poly2grid(gdf, gdattrs)
df = getattr(ol.groupby(['ROW', 'COL']), how)(numeric_only=True)
ds = df.to_xarray()
outds = ds.interp(
COL=np.arange(gdattrs['NCOLS']) + 0.5,
ROW=np.arange(gdattrs['NROWS']) + 0.5
).expand_dims(
TSTEP=1, LAY=1
)
outds.coords['TSTEP'] = pd.to_datetime(['1970-01-01 00:00:00Z']).values
outds.coords['LAY'] = np.array([1])
outkeys = list(outds.data_vars)
for k in outkeys:
outds[k].attrs.update(
long_name=k.ljust(16), var_desc=k.ljust(80),
units='unknown'.ljust(16)
)
outds.attrs['NVARS'] = len(outkeys)
outds.attrs['VAR-LIST'] = ''.join([k.ljust(16) for k in outkeys])
outds.attrs.update(gdattrs)
return outds