import numpy as np
import pandas as pd
from ..utils import import_optional_dependency
from ..utils.flopy_io import get_url_text
[docs]class acdd:
"""
Translate ScienceBase global metadata attributes to CF and ACDD
global attributes.
Parameters
----------
sciencebase_id : str
Unique identifier for ScienceBase record
(e.g. 582da7efe4b04d580bd37be8)
model : flopy model object
Model object
References
----------
https://www.sciencebase.gov/catalog/
https://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#description-of-file-contents
https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery
"""
def __init__(self, sciencebase_id, model):
"""
Class constructor
"""
self.id = sciencebase_id
self.model = model
self.model_grid = model.modelgrid
self.model_time = model.modeltime
self.sciencebase_url = (
f"https://www.sciencebase.gov/catalog/item/{sciencebase_id}"
)
self.sb = self.get_sciencebase_metadata(sciencebase_id)
if self.sb is None:
return
# stuff Jeremy mentioned
self.abstract = self.sb["summary"]
self.authors = [
c["name"] for c in self.sb["contacts"] if "Originator" in c["type"]
]
# report image?
# keys that are the same in sbjson and acdd;
# or additional attributes to carry over
for k in ["title", "summary", "id", "citation"]:
self.__dict__[k] = self.sb.get(k, None)
# highly recommended global attributes
# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery
self.keywords = [t["name"] for t in self.sb["tags"]]
# recommended global attributes
self.naming_authority = "ScienceBase" # org. that provides the id
# self.history = None # This is a character array with a line for each invocation of a program that has modified the dataset.
# Well-behaved generic netCDF applications should append a line containing:
# date, time of day, user name, program name and command arguments.
self.source = (
model.model_ws
) # The method of production of the original data.
# If it was model-generated, source should name the model and its version.
# self.processing_level = None # A textual description of the processing (or quality control) level of the data.
# self.comment = None # Miscellaneous information about the data, not captured elsewhere.
# This attribute is defined in the CF Conventions.
self.acknowledgement = self._get_xml_attribute("datacred")
# self.license = None #
# self.standard_name_vocabulary = None
self.date_created = self.sb["provenance"]["linkProcess"].get(
"dateCreated"
)
self.creator_name = self.creator.get("name")
self.creator_email = self.creator.get("email")
# self.creator_url = self.sb['webLinks'][0].get('uri')
self.creator_institution = self.creator["organization"].get(
"displayText"
)
self.institution = (
self.creator_institution
) # also in CF convention for global attributes
self.project = self.sb["title"]
self.publisher_name = [
d.get("name")
for d in self.sb["contacts"]
if "publisher" in d.get("type").lower()
][0]
self.publisher_email = self.sb["provenance"]["linkProcess"].get(
"processedBy"
)
self.publisher_url = "https://www2.usgs.gov/water/" # self.sb['provenance']['linkProcess'].get('linkReference')
self.geospatial_bounds_crs = "EPSG:4326"
self.geospatial_lat_min = self.bounds.get("minY")
self.geospatial_lat_max = self.bounds.get("maxY")
self.geospatial_lon_min = self.bounds.get("minX")
self.geospatial_lon_max = self.bounds.get("maxX")
self.geospatial_vertical_min = self.model_grid.botm.min()
self.geospatial_vertical_max = self.model_grid.top.max()
self.geospatial_vertical_positive = (
"up" # assumed to always be up for GW models
)
self.time_coverage_start = self.time_coverage.get("start")
self.time_coverage_end = self.time_coverage.get("end")
self.time_coverage_duration = self.time_coverage.get("duration")
# because the start/end date formats aren't consistent between models
self.time_coverage_resolution = self.time_coverage.get("resolution")
self.metadata_link = self.sciencebase_url
def _get_xml_attribute(self, attr):
try:
return list(self.xmlroot.iter(attr))[0].text
except:
return None
@property
def bounds(self):
return self.sb["spatial"]["boundingBox"]
@property
def creator(self):
return [
d
for d in self.sb["contacts"]
if "point of contact" in d["type"].lower()
][0]
@property
def creator_url(self):
urlname = "-".join(self.creator.get("name").replace(".", "").split())
url = f"https://www.usgs.gov/staff-profiles/{urlname.lower()}"
# check if it exists
txt = get_url_text(url)
if txt is not None:
return url
else:
return "unknown"
@property
def geospatial_bounds(self):
"""
Describes the data's 2D or 3D geospatial extent in OGC's Well-Known
Text (WKT) Geometry format
"""
fmt = "(({0} {2}, {0} {3}, {1} {3}, {1} {2}, {0} {2}))"
bounds = "POLYGON " + fmt.format(
self.geospatial_lon_min,
self.geospatial_lon_max,
self.geospatial_lat_min,
self.geospatial_lat_max,
)
return bounds
@property
def geospatial_bounds_vertical_crs(self):
"""
The vertical coordinate reference system (CRS) for the Z axis of
the point coordinates in the geospatial_bounds attribute.
"""
epsg = {"NGVD29": "EPSG:5702", "NAVD88": "EPSG:5703"}
return epsg.get(self.vertical_datum)
@property
def references(self):
"""
Returns
-------
"""
r = [self.citation]
links = [
d.get("uri")
for d in self.sb["webLinks"]
if "link" in d.get("type").lower()
]
return r + links
@property
def time_coverage(self):
"""
Returns
-------
"""
l = self.sb["dates"]
tc = {}
for t in ["start", "end"]:
tc[t] = [d.get("dateString") for d in l if t in d["type"].lower()][
0
]
if not np.all(self.model_time.steady_state) and pd is not None:
# replace with times from model reference
tc["start"] = self.model_time.start_datetime
strt = pd.Timestamp(self.model_time.start_datetime)
mlen = self.model_time.perlen.sum()
tunits = self.model_time.time_units
tc["duration"] = f"{mlen} {tunits}"
end = strt + pd.Timedelta(mlen, unit="d")
tc["end"] = str(end)
return tc
@property
def vertical_datum(self):
"""
Try to parse the vertical datum from the xml info
"""
altdatum = self._get_xml_attribute("altdatum")
if altdatum is not None:
if "88" in altdatum:
return "NAVD88"
elif "29" in altdatum:
return "NGVD29"
else:
return None
@property
def xmlroot(self):
"""
ElementTree root element object for xml metadata
"""
try:
return self.get_sciencebase_xml_metadata()
except:
return None
@property
def xmlfile(self):
return self.sb["identifiers"][0].get("key")