Source code for pyrsig.utils

__all__ = [
    'get_proj4', 'customize_grid', 'def_grid_kw', 'shared_grid_kw',
    'coverages_from_xml', 'legacy_get', 'get_file'
]
import requests

def_grid_kw = {
    '12US1': dict(
        GDNAM='12US1', GDTYP=2, NCOLS=459, NROWS=299,
        XORIG=-2556000.0, YORIG=-1728000.0, XCELL=12000., YCELL=12000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '4US1': dict(
        GDNAM='4US1', GDTYP=2, NCOLS=459 * 3, NROWS=299 * 3,
        XORIG=-2556000.0, YORIG=-1728000.0, XCELL=4000., YCELL=4000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '1US1': dict(
        GDNAM='1US1', GDTYP=2, NCOLS=459 * 12, NROWS=299 * 12,
        XORIG=-2556000.0, YORIG=-1728000.0, XCELL=1000., YCELL=1000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '12US2': dict(
        GDNAM='12US2', GDTYP=2, NCOLS=396, NROWS=246,
        XORIG=-2412000.0, YORIG=-1620000.0, XCELL=12000., YCELL=12000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '4US2': dict(
        GDNAM='4US2', GDTYP=2, NCOLS=396 * 3, NROWS=246 * 3,
        XORIG=-2412000.0, YORIG=-1620000.0, XCELL=4000., YCELL=4000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '1US2': dict(
        GDNAM='1US2', GDTYP=2, NCOLS=396 * 12, NROWS=246 * 12,
        XORIG=-2412000.0, YORIG=-1620000.0, XCELL=1000., YCELL=1000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '36US3': dict(
        GDNAM='36US3', GDTYP=2, NCOLS=172, NROWS=148,
        XORIG=-2952000.0, YORIG=-2772000.0, XCELL=36000., YCELL=36000.,
        P_ALP=33., P_BET=45., P_GAM=-97., XCENT=-97., YCENT=40.
    ),
    '108NHEMI2': dict(
        GDNAM='108NHEMI2', GDTYP=6, NCOLS=187, NROWS=187,
        XORIG=-10098000.0, YORIG=-10098000.0, XCELL=108000., YCELL=108000.,
        P_ALP=1., P_BET=45., P_GAM=-98., XCENT=-98., YCENT=90.
    ),
    '36NHEMI2': dict(
        GDNAM='36NHEMI2', GDTYP=6, NCOLS=187 * 3, NROWS=187 * 3,
        XORIG=-10098000.0, YORIG=-10098000.0, XCELL=36000., YCELL=36000.,
        P_ALP=1., P_BET=45., P_GAM=-98., XCENT=-98., YCENT=90.
    ),
    'NORTHSOUTHAM': dict(
        GDNAM='NORTHSOUTHAM', GDTYP=7, NCOLS=179, NROWS=154,
        XORIG=251759.25, YORIG=-1578187., XCELL=27000., YCELL=27000.,
        P_ALP=0., P_BET=0., P_GAM=-98., XCENT=-98., YCENT=0.
    ),
    'global_1pt0': dict(
        GDNAM='GLOBAL', GDTYP=1, NCOLS=360, NROWS=180,
        XORIG=-180, YORIG=-90, XCELL=1., YCELL=1.,
        P_ALP=0., P_BET=0., P_GAM=0., XCENT=0., YCENT=0.
    ),
    'global_0pt1': dict(
        GDNAM='GLOBAL', GDTYP=1, NCOLS=3600, NROWS=1800,
        XORIG=-180, YORIG=-90, XCELL=0.1, YCELL=0.1,
        P_ALP=0., P_BET=0., P_GAM=0., XCENT=0., YCENT=0.
    ),
}

shared_grid_kw = dict(
    VGTYP=7, VGTOP=5000., NLAYS=35, earth_radius=6370000., g=9.81, R=287.04,
    A=50., T0=290, P0=1000e2, REGRID_AGGREGATE='None'
)

for key in def_grid_kw:
    for pk, pv in shared_grid_kw.items():
        def_grid_kw[key].setdefault(pk, pv)


[docs]def get_proj4(attrs, earth_radius=6370000.):
    """
    Create a proj4 formatted grid definition using IOAPI attrs and earth_radius

    Arguments
    ---------
    attrs : dict-like
        Mappable of IOAPI properties that supports the items method
    earth_radius : float
        Assumed radius of the earth. 6370000 is the WRF default.

    Returns
    -------
    projstr : str
        proj4 formatted string such that the domain southwest corner starts at
        (0, 0) and ends at (NCOLS, NROWS)
    """
    props = {k: v for k, v in attrs.items()}
    props['x_0'] = -props['XORIG']
    props['y_0'] = -props['YORIG']
    props.setdefault('earth_radius', earth_radius)

    if props['GDTYP'] == 1:
        projstr = '+proj=lonlat +R={earth_radius}'.format(**props)
    elif props['GDTYP'] == 2:
        projstr = (
            '+proj=lcc +lat_1={P_ALP} +lat_2={P_BET} +lat_0={YCENT}'
            ' +lon_0={XCENT} +R={earth_radius} +x_0={x_0} +y_0={y_0}'
            ' +to_meter={XCELL} +no_defs'
        ).format(**props)
    elif props['GDTYP'] == 6:
        projstr = (
            '+proj=stere +lat_0={lat_0} +lat_ts={P_BET} +lon_0={XCENT}'
            + ' +x_0={x_0} +y_0={y_0} +R={earth_radius} +to_meter={XCELL}'
            + ' +no_defs'
        ).format(lat_0=props['P_ALP'] * 90, **props)
    elif props['GDTYP'] == 7:
        projstr = (
            '+proj=merc +R={earth_radius} +lat_ts=0 +lon_0={XCENT}'
            + ' +x_0={x_0} +y_0={y_0} +to_meter={XCELL}'
            + ' +no_defs'
        ).format(**props)
    else:
        raise ValueError('GDTYPE {GDTYP} not implemented'.format(**props))

    return projstr


[docs]def customize_grid(grid_kw, bbox, clip=True):
    """
    Redefine grid_kw to cover bbox by removing extra rows and columns and
    redefining XORIG, YORIG, NCOLS and NROWS.

    Arguments
    ---------
    grid_kw : dict or str
        If str, must be a known grid in default grids.
        If dict, must include all IOAPI grid metadata properties
    bbox : tuple
        wlon, slat, elon, nlat in decimal degrees (-180 to 180)
    clip : bool
        If True, limit grid to original grid bounds

    Returns
    -------
    ogrid_kw : dict
        IOAPI grid metadata properties with XORIG/YORIG and NCOLS/NROWS
        adjusted such that it only covers bbox or (if clip) only covers
        the portion of bbox covered by the original grid_kw.
    """
    import pyproj
    import numpy as np

    if isinstance(grid_kw, str):
        grid_kw = def_grid_kw[grid_kw]

    ogrid_kw = {k: v for k, v in grid_kw.items()}
    # Lonlat box must be treated separately
    if ogrid_kw['GDTYP'] == 1:
        llx, lly = bbox[:2]
        urx, ury = bbox[2:]
        ncols = int(np.ceil((urx - llx) / ogrid_kw['XCELL']) + 4)
        nrows = int(np.ceil((ury - lly) / ogrid_kw['YCELL']) + 4)
        xorig = (int(llx / ogrid_kw['XCELL']) - 1) * ogrid_kw['XCELL']
        yorig = (int(lly / ogrid_kw['YCELL']) - 1) * ogrid_kw['YCELL']
        ogrid_kw['NCOLS'] = ncols
        ogrid_kw['NROWS'] = nrows
        ogrid_kw['XORIG'] = xorig
        ogrid_kw['YORIG'] = yorig
        return ogrid_kw

    proj4str = get_proj4(grid_kw)
    proj = pyproj.Proj(proj4str)
    llx, lly = proj(*bbox[:2])
    urx, ury = proj(*bbox[2:])
    midx, midy = proj((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
    maxy = np.max([lly, ury, midy])
    miny = np.min([lly, ury, midy])
    maxx = np.max([llx, urx, midx])
    minx = np.min([llx, urx, midx])

    lli, llj = np.floor([minx, miny]).astype('i')
    uri, urj = np.ceil([maxx, maxy]).astype('i')
    if clip:
        lli, llj = np.maximum(0, [lli, llj])
        uri = np.minimum(grid_kw['NCOLS'], uri)
        urj = np.minimum(grid_kw['NROWS'], urj)
    ogrid_kw['XORIG'] = grid_kw['XORIG'] + lli * grid_kw['XCELL']
    ogrid_kw['YORIG'] = grid_kw['YORIG'] + llj * grid_kw['YCELL']
    ogrid_kw['NCOLS'] = uri - lli
    ogrid_kw['NROWS'] = urj - llj
    return ogrid_kw


def quickstats(df, refkey='obs'):
    """
    Simple utility to calculate quantiles, mean, correlation, bias statistics,
    and Index of Agreement. All statistics have the input data units except
    for nmb, fmb, correlation, and ioa -- all those are unitless.

    Arguments
    ---------
    df : pandas.DataFrame
        Must contain at least refkey as a column. All other columns will be
        treated as estimates.
    refkey : str
        Key to be used as the reference (or truth or observation) value.

    Returns
    -------
    statdf : pandas.DataFrame
        Contains results where each statistic is a row and columns are refkey
        and all the estimates

    Note:
    For nmb or fmb in percent, multiply by 100.
    """
    statsdf = df.describe()
    statsdf.loc['r'] = df.corr().loc[refkey]
    meands = statsdf.loc['mean']
    # Mean Bias [ppb] # same as ozone and CMAQ_O3
    statsdf.loc['mb'] = meands - meands[refkey]
    # Normalized Mean Bias [1]
    statsdf.loc['nmb'] = statsdf.loc['mb'] / meands[refkey]
    # Fractional Mean Bias [1]
    statsdf.loc['fmb'] = 2 * statsdf.loc['mb'] / meands.sum()
    # IOA [1]
    bias = df.subtract(df[refkey], axis=0)
    sqerr = bias**2
    apdev = df.subtract(meands[refkey]).abs()
    statsdf.loc['ioa'] = (
        1 - sqerr.sum() / (apdev.add(apdev[refkey], axis=0)**2).sum()
    )
    return statsdf


def parsexml(root):
    """Recursive xml parsing:
    Given a root, return dictionaries for each element and its children.
    Each element has children, attributes (attr), tag, and text.
    If any of these has no elements, it will be removed.
    """
    out = {}
    out['tag'] = root.tag.split('}')[-1]
    out['attr'] = root.attrib
    out['text'] = root.text
    out['children'] = []

    for child in root:
        childd = parsexml(child)
        out['children'].append(childd)

    if len(out['children']) == 0:
        del out['children']
    if out['text'] is None:
        out['text'] = ''

    out['text'] = out['text'].strip()
    if len(out['text']) == 0:
        del out['text']
    if len(out['attr']) == 0:
        del out['attr']

    return out


[docs]def coverages_from_xml(txt):
    """Based on xml text, create coverage data"""
    import xml.etree.ElementTree as ET

    root = ET.fromstring(txt)

    xmlout = parsexml(root)
    out = []
    for c in xmlout['children']:
        record = {k: v for k, v in c.items() if k != 'children'}
        kids = c['children']
        for e in kids:
            if 'attr' not in e and len(e.get('children', [])) == 0:
                record[e['tag']] = e.get('text', '')

            if e['tag'] == 'lonLatEnvelope':
                envtxt = ''
                for p in e['children']:
                    envtxt += ' ' + p['text']
                record['bbox_str'] = envtxt.strip()

            if e['tag'] == 'domainSet':
                for s in e['children']:
                    if s['tag'] == 'temporalDomain':
                        for tp in s['children']:
                            for te in tp['children']:
                                record[te['tag']] = te['text']

        out.append(record)

    return out


[docs]def legacy_get(url, *args, **kwds):

    import copy
    session = requests.session()
    la = LegacyAdapter()

    # Maple has a bad certificate, so here if you are using maple
    # I disable the certificate, but no the warning
    if 'maple' in url:
        la.ssl_context.check_hostname = False
        kwds = copy.copy(kwds)
        kwds.setdefault('verify', False)

    session.mount('https://', la)

    return session.get(url, *args, **kwds)


def _create_unverified_tls_context(*args, **kwds):
    """
    Thin wrapper around ssl._create_unverified_context that adds the option to
    use TLS negotiation, which is currently used by RSIG servers.
    """
    import ssl
    # Set up SSL context to allow legacy TLS versions
    ctx = ssl._create_unverified_context(*args, **kwds)
    ctx.options |= 0x4  # OP_LEGACY_SERVER_CONNECT
    return ctx


class LegacyAdapter(requests.adapters.HTTPAdapter):
    # "Transport adapter" that allows us to use custom ssl_context.

    def __init__(self, **kwargs):
        import ssl
        def_ctx = ssl._create_default_https_context
        ssl._create_default_https_context = _create_unverified_tls_context
        ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
        ctx.options |= 0x4  # OP_LEGACY_SERVER_CONNECT
        self.ssl_context = ctx
        ssl._create_default_https_context = def_ctx
        super().__init__(**kwargs)

    def init_poolmanager(self, connections, maxsize, block=False):
        import urllib3
        self.poolmanager = urllib3.poolmanager.PoolManager(
            num_pools=connections, maxsize=maxsize,
            block=block, ssl_context=self.ssl_context)


def _progress(blocknum, readsize, totalsize):
    """
    Display progress using dots or % indicator.

    Arguments
    ---------
    blocknum : int
        block number of blocks to be read
    readsize : int
        chunksize read
    totalsize : int
        -1 unknown or size of file
    """
    totalblocks = (totalsize // readsize) + 1
    pblocks = totalblocks // 10
    if pblocks <= 0:
        pblocks = 100
    if totalsize > 0:
        print(
            '\r' + 'Retrieving {:.0f}'.format(readsize/totalsize*100), end='',
            flush=True
        )
    else:
        if blocknum == 0:
            print('Retrieving .', end='', flush=True)
        if (blocknum % pblocks) == 0:
            print('.', end='', flush=True)


[docs]def get_file(url, outpath, maxtries=5, verbose=1, overwrite=False):
    """
    Download file from RSIG using fault tolerance and optional caching
    when overwrite is False.

    Arguments
    ---------
    url : str
        path to retrieve
    outpath : str
        path to save file to
    maxtries : int
        try this many times before quitting
    verbose : int
        Level of verbosity
    overwrite : bool
        If True, overwrite existing files.
        If False, reuse existing files.

    Returns
    -------
    None
    """
    import time
    from urllib.request import urlretrieve
    import ssl
    import os
    from .utils import _create_unverified_tls_context

    # If the file exists, get the current size
    if not overwrite and os.path.exists(outpath):
        stat = os.stat(outpath)
        dlsize = stat.st_size
    else:
        dlsize = 0

    # if the size is non-zero, assume it is good
    if dlsize > 0 and verbose >= 0:
        print('Using cached:', outpath)
        return

    _def_https_context = ssl._create_default_https_context
    ssl._create_default_https_context = _create_unverified_tls_context

    # Try to download the file maxtries times
    tries = 0
    if verbose > 0:
        reporthook = _progress
    else:
        reporthook = None
    while dlsize <= 0 and tries < maxtries:
        # Remove 0-sized files.
        outdir = os.path.dirname(outpath)
        if os.path.exists(outpath):
            os.remove(outpath)
        os.makedirs(outdir, exist_ok=True)
        if verbose > 0:
            print('Calling RSIG', outpath, '')
        t0 = time.time()
        urlretrieve(
            url=url,
            filename=outpath,
            reporthook=reporthook,
        )
        # Check timing
        t1 = time.time()
        stat = os.stat(outpath)
        dlsize = stat.st_size

        if dlsize == 0:
            print('Failed', url, t1 - t0)
        tries += 1

        if verbose > 0:
            print('')

    ssl._create_default_https_context = _def_https_context


def grid2poly(gdattrs):
    """
    Arguments
    ---------
    gdattrs : dict
        Attributes of IOAPI grid
    """
    import numpy as np
    from shapely import polygons
    import geopandas as gpd
    import pandas as pd

    # gdattrs = pyrsig.utils.def_grid_kw['12US1']
    proj4 = get_proj4(gdattrs)
    # Calculate x/y centers
    y = np.arange(gdattrs['NROWS']) + 0.5
    x = np.arange(gdattrs['NCOLS']) + 0.5
    # Create a center for each cell
    xy = np.stack(np.meshgrid(x, y), axis=2).reshape(-1, 2)
    # Calculate offsets from center for the ll, lr, ur, ul
    dll = np.array([-0.5, -0.5])
    dlr = np.array([0.5, -0.5])
    dur = np.array([0.5, 0.5])
    dul = np.array([-0.5, 0.5])
    # Create corners at points for polygons
    crnr = np.stack([xy + dll, xy + dlr, xy + dur, xy + dul], axis=1)
    # Calculate grid polygons
    qgeom = polygons(crnr)
    index = pd.MultiIndex.from_arrays(xy.T, names=['COL', 'ROW'])
    qdf = gpd.GeoDataFrame({}, geometry=qgeom, crs=proj4, index=index)
    return qdf


def poly2grid(gdf, gdattrs):
    """
    Arguments
    ---------
    gdf : geopandas.GeoDataFrame
        Data to create fractional area overlap.
    gdattrs : dict
        Attributes of IOAPI grid

    Returns
    -------
    oldf : geopandas.GeoDataFrame
        Overlap of gdf with grid
    """
    import geopandas as gpd
    import warnings

    qdf = grid2poly(gdattrs)
    if gdf.crs is None:
        warnings.warn('No CRS provided; assuming input is EPSG:4326')
        gdf.crs = 4326
    gqdf = gdf.to_crs(qdf.crs)
    oldf = gpd.overlay(qdf.reset_index(), gqdf)
    oldf['area_overlap'] = oldf['geometry'].area
    return oldf


def poly2ioapi(gdf, gdattrs, how='mean'):
    import numpy as np
    import pandas as pd
    ol = poly2grid(gdf, gdattrs)
    df = getattr(ol.groupby(['ROW', 'COL']), how)(numeric_only=True)
    ds = df.to_xarray()
    outds = ds.interp(
        COL=np.arange(gdattrs['NCOLS']) + 0.5,
        ROW=np.arange(gdattrs['NROWS']) + 0.5
    ).expand_dims(
        TSTEP=1, LAY=1
    )
    outds.coords['TSTEP'] = pd.to_datetime(['1970-01-01 00:00:00Z']).values
    outds.coords['LAY'] = np.array([1])
    outkeys = list(outds.data_vars)
    for k in outkeys:
        outds[k].attrs.update(
            long_name=k.ljust(16), var_desc=k.ljust(80),
            units='unknown'.ljust(16)
        )
    outds.attrs['NVARS'] = len(outkeys)
    outds.attrs['VAR-LIST'] = ''.join([k.ljust(16) for k in outkeys])
    outds.attrs.update(gdattrs)
    return outds
Source code for pyrsig.utils

pyrsig

Navigation

Related Topics