Source code for pvlib.iotools.merra2

import pandas as pd
import requests
from io import StringIO


VARIABLE_MAP = {
    'SWGDN': 'ghi',
    'SWGDNCLR': 'ghi_clear',
    'ALBEDO': 'albedo',
    'LWGNT': 'longwave_net',
    'LWGEM': 'longwave_up',
    'LWGAB': 'longwave_down',
    'T2M': 'temp_air',
    'T2MDEW': 'temp_dew',
    'PS': 'pressure',
    'TOTEXTTAU': 'aod550',
}


[docs] def get_merra2(latitude, longitude, start, end, username, password, dataset, variables, map_variables=True): """ Retrieve MERRA-2 time-series irradiance and meteorological reanalysis data from NASA's GESDISC data archive. MERRA-2 [1]_ offers modeled data for many atmospheric quantities at hourly resolution on a 0.5° x 0.625° global grid. Access must be granted to the GESDISC data archive before EarthData credentials will work. See [2]_ for instructions. Parameters ---------- latitude : float In decimal degrees, north is positive (ISO 19115). longitude: float In decimal degrees, east is positive (ISO 19115). start : datetime like or str First timestamp of the requested period. If a timezone is not specified, UTC is assumed. end : datetime like or str Last timestamp of the requested period. If a timezone is not specified, UTC is assumed. Must be in the same year as ``start``. username : str NASA EarthData username. password : str NASA EarthData password. dataset : str Dataset name (with version), e.g. "M2T1NXRAD.5.12.4". variables : list of str List of variable names to retrieve. See the documentation of the specific dataset you are accessing for options. map_variables : bool, default True When true, renames columns of the DataFrame to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. Raises ------ ValueError If ``start`` and ``end`` are in different years, when converted to UTC. Returns ------- data : pd.DataFrame Time series data. The index corresponds to the middle of the interval. meta : dict Metadata. Notes ----- The following datasets provide quantities useful for PV modeling: +------------------------------------+-----------+---------------+ | Dataset | Variable | pvlib name | +====================================+===========+===============+ | `M2T1NXRAD.5.12.4 <M2T1NXRAD_>`_ | SWGDN | ghi | | +-----------+---------------+ | | SWGDNCLR | ghi_clear | | +-----------+---------------+ | | ALBEDO | albedo | | +-----------+---------------+ | | LWGAB | longwave_down | | +-----------+---------------+ | | LWGNT | longwave_net | | +-----------+---------------+ | | LWGEM | longwave_up | +------------------------------------+-----------+---------------+ | `M2T1NXSLV.5.12.4 <M2T1NXSLV_>`_ | T2M | temp_air | | +-----------+---------------+ | | U10 | n/a | | +-----------+---------------+ | | V10 | n/a | | +-----------+---------------+ | | T2MDEW | temp_dew | | +-----------+---------------+ | | PS | pressure | | +-----------+---------------+ | | TO3 | n/a | | +-----------+---------------+ | | TQV | n/a | +------------------------------------+-----------+---------------+ | `M2T1NXAER.5.12.4 <M2T1NXAER_>`_ | TOTEXTTAU | aod550 | | +-----------+---------------+ | | TOTSCATAU | n/a | | +-----------+---------------+ | | TOTANGSTR | n/a | +------------------------------------+-----------+---------------+ .. _M2T1NXRAD: https://disc.gsfc.nasa.gov/datasets/M2T1NXRAD_5.12.4/summary .. _M2T1NXSLV: https://disc.gsfc.nasa.gov/datasets/M2T1NXSLV_5.12.4/summary .. _M2T1NXAER: https://disc.gsfc.nasa.gov/datasets/M2T1NXAER_5.12.4/summary A complete list of datasets and their documentation is available at [3]_. Note that MERRA2 does not currently provide DNI or DHI. References ---------- .. [1] https://gmao.gsfc.nasa.gov/gmao-products/merra-2/ .. [2] https://disc.gsfc.nasa.gov/earthdata-login .. [3] https://disc.gsfc.nasa.gov/datasets?project=MERRA-2 """ # general API info here: # https://docs.unidata.ucar.edu/tds/5.0/userguide/netcdf_subset_service_ref.html # noqa: E501 def _to_utc_dt_notz(dt): dt = pd.to_datetime(dt) if dt.tzinfo is not None: # convert to utc, then drop tz so that isoformat() is clean dt = dt.tz_convert("UTC").tz_localize(None) return dt start = _to_utc_dt_notz(start) end = _to_utc_dt_notz(end) if (year := start.year) != end.year: raise ValueError("start and end must be in the same year (in UTC)") url = ( "https://goldsmr4.gesdisc.eosdis.nasa.gov/thredds/ncss/grid/" f"MERRA2_aggregation/{dataset}/{dataset}_Aggregation_{year}.ncml" ) parameters = { 'var': ",".join(variables), 'latitude': latitude, 'longitude': longitude, 'time_start': start.isoformat() + "Z", 'time_end': end.isoformat() + "Z", 'accept': 'csv', } auth = (username, password) with requests.Session() as session: session.auth = auth login = session.request('get', url, params=parameters) response = session.get(login.url, auth=auth, params=parameters) response.raise_for_status() content = response.content.decode('utf-8') buffer = StringIO(content) df = pd.read_csv(buffer) df.index = pd.to_datetime(df['time']) meta = {} meta['dataset'] = dataset meta['station'] = df['station'].values[0] meta['latitude'] = df['latitude[unit="degrees_north"]'].values[0] meta['longitude'] = df['longitude[unit="degrees_east"]'].values[0] # drop the non-data columns dropcols = ['time', 'station', 'latitude[unit="degrees_north"]', 'longitude[unit="degrees_east"]'] df = df.drop(columns=dropcols) # column names are like T2M[unit="K"] by default. extract the unit # for the metadata, then rename col to just T2M units = {} rename = {} for col in df.columns: name, _ = col.split("[", maxsplit=1) unit = col.split('"')[1] units[name] = unit rename[col] = name meta['units'] = units df = df.rename(columns=rename) if map_variables: df = df.rename(columns=VARIABLE_MAP) return df, meta