Source code for flopy.export.metadata

import numpy as np
import pandas as pd

from ..utils import import_optional_dependency
from ..utils.flopy_io import get_url_text


[docs]class acdd: """ Translate ScienceBase global metadata attributes to CF and ACDD global attributes. Parameters ---------- sciencebase_id : str Unique identifier for ScienceBase record (e.g. 582da7efe4b04d580bd37be8) model : flopy model object Model object References ---------- https://www.sciencebase.gov/catalog/ https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#description-of-file-contents https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery """ def __init__(self, sciencebase_id, model): """ Class constructor """ self.id = sciencebase_id self.model = model self.model_grid = model.modelgrid self.model_time = model.modeltime self.sciencebase_url = ( f"https://www.sciencebase.gov/catalog/item/{sciencebase_id}" ) self.sb = self.get_sciencebase_metadata(sciencebase_id) if self.sb is None: return # stuff Jeremy mentioned self.abstract = self.sb["summary"] self.authors = [ c["name"] for c in self.sb["contacts"] if "Originator" in c["type"] ] # report image? # keys that are the same in sbjson and acdd; # or additional attributes to carry over for k in ["title", "summary", "id", "citation"]: self.__dict__[k] = self.sb.get(k, None) # highly recommended global attributes # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery self.keywords = [t["name"] for t in self.sb["tags"]] # recommended global attributes self.naming_authority = "ScienceBase" # org. that provides the id # self.history = None # This is a character array with a line for each invocation of a program that has modified the dataset. # Well-behaved generic netCDF applications should append a line containing: # date, time of day, user name, program name and command arguments. self.source = ( model.model_ws ) # The method of production of the original data. # If it was model-generated, source should name the model and its version. # self.processing_level = None # A textual description of the processing (or quality control) level of the data. # self.comment = None # Miscellaneous information about the data, not captured elsewhere. # This attribute is defined in the CF Conventions. self.acknowledgement = self._get_xml_attribute("datacred") # self.license = None # # self.standard_name_vocabulary = None self.date_created = self.sb["provenance"]["linkProcess"].get( "dateCreated" ) self.creator_name = self.creator.get("name") self.creator_email = self.creator.get("email") # self.creator_url = self.sb['webLinks'][0].get('uri') self.creator_institution = self.creator["organization"].get( "displayText" ) self.institution = ( self.creator_institution ) # also in CF convention for global attributes self.project = self.sb["title"] self.publisher_name = [ d.get("name") for d in self.sb["contacts"] if "publisher" in d.get("type").lower() ][0] self.publisher_email = self.sb["provenance"]["linkProcess"].get( "processedBy" ) self.publisher_url = "https://www2.usgs.gov/water/" # self.sb['provenance']['linkProcess'].get('linkReference') self.geospatial_bounds_crs = "EPSG:4326" self.geospatial_lat_min = self.bounds.get("minY") self.geospatial_lat_max = self.bounds.get("maxY") self.geospatial_lon_min = self.bounds.get("minX") self.geospatial_lon_max = self.bounds.get("maxX") self.geospatial_vertical_min = self.model_grid.botm.min() self.geospatial_vertical_max = self.model_grid.top.max() self.geospatial_vertical_positive = ( "up" # assumed to always be up for GW models ) self.time_coverage_start = self.time_coverage.get("start") self.time_coverage_end = self.time_coverage.get("end") self.time_coverage_duration = self.time_coverage.get("duration") # because the start/end date formats aren't consistent between models self.time_coverage_resolution = self.time_coverage.get("resolution") self.metadata_link = self.sciencebase_url def _get_xml_attribute(self, attr): try: return list(self.xmlroot.iter(attr))[0].text except: return None @property def bounds(self): return self.sb["spatial"]["boundingBox"] @property def creator(self): return [ d for d in self.sb["contacts"] if "point of contact" in d["type"].lower() ][0] @property def creator_url(self): urlname = "-".join(self.creator.get("name").replace(".", "").split()) url = f"https://www.usgs.gov/staff-profiles/{urlname.lower()}" # check if it exists txt = get_url_text(url) if txt is not None: return url else: return "unknown" @property def geospatial_bounds(self): """ Describes the data's 2D or 3D geospatial extent in OGC's Well-Known Text (WKT) Geometry format """ fmt = "(({0} {2}, {0} {3}, {1} {3}, {1} {2}, {0} {2}))" bounds = "POLYGON " + fmt.format( self.geospatial_lon_min, self.geospatial_lon_max, self.geospatial_lat_min, self.geospatial_lat_max, ) return bounds @property def geospatial_bounds_vertical_crs(self): """ The vertical coordinate reference system (CRS) for the Z axis of the point coordinates in the geospatial_bounds attribute. """ epsg = {"NGVD29": "EPSG:5702", "NAVD88": "EPSG:5703"} return epsg.get(self.vertical_datum) @property def references(self): """ Returns ------- """ r = [self.citation] links = [ d.get("uri") for d in self.sb["webLinks"] if "link" in d.get("type").lower() ] return r + links @property def time_coverage(self): """ Returns ------- """ l = self.sb["dates"] tc = {} for t in ["start", "end"]: tc[t] = [d.get("dateString") for d in l if t in d["type"].lower()][ 0 ] if not np.all(self.model_time.steady_state) and pd is not None: # replace with times from model reference tc["start"] = self.model_time.start_datetime strt = pd.Timestamp(self.model_time.start_datetime) mlen = self.model_time.perlen.sum() tunits = self.model_time.time_units tc["duration"] = f"{mlen} {tunits}" end = strt + pd.Timedelta(mlen, unit="d") tc["end"] = str(end) return tc @property def vertical_datum(self): """ Try to parse the vertical datum from the xml info """ altdatum = self._get_xml_attribute("altdatum") if altdatum is not None: if "88" in altdatum: return "NAVD88" elif "29" in altdatum: return "NGVD29" else: return None @property def xmlroot(self): """ ElementTree root element object for xml metadata """ try: return self.get_sciencebase_xml_metadata() except: return None @property def xmlfile(self): return self.sb["identifiers"][0].get("key")
[docs] def get_sciencebase_metadata(self, id): """ Gets metadata json text for given ID from sciencebase.gov; loads into python dictionary. Fetches the reference text using the url: https://www.sciencebase.gov/catalog/item/<ID>?format=json Parameters ---------- ID : str ScienceBase ID string; e.g. 582da7efe4b04d580bd37be8 for Dane County Model Returns ------- metadata : dict Dictionary of metadata """ urlbase = "https://www.sciencebase.gov/catalog/item/{}?format=json" url = urlbase.format(id) import json msg = "Need an internet connection to get metadata from ScienceBase." text = get_url_text(url, error_msg=msg) if text is not None: return json.loads(text)
[docs] def get_sciencebase_xml_metadata(self): """ Gets xml from sciencebase.gov, using XML url obtained from json using get_sciencebase_metadata(). Parameters ---------- ID : str ScienceBase ID string; e.g. 582da7efe4b04d580bd37be8 for Dane County Model Returns ------- metadata : dict Dictionary of metadata """ # use defusedxml to removed XML security vulnerabilities ET = import_optional_dependency("defusedxml.ElementTree") url = self.xmlfile msg = "Need an internet connection to get metadata from ScienceBase." text = get_url_text(url, error_msg=msg) return ET.fromstring(text)