Skip to content

Documentation for Granules

DataGranules is the class earthaccess uses to query CMR at the granule level.

Bases: GranuleQuery

A Granule oriented client for NASA CMR.

Api

https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html

Base class for Granule and Collection CMR queries.

Source code in earthaccess/search.py
def __init__(self, auth: Any = None, *args: Any, **kwargs: Any) -> None:
    """Base class for Granule and Collection CMR queries."""
    super().__init__(*args, **kwargs)
    self.session = requests.session()
    if auth is not None and auth.authenticated:
        # To search, we need the new bearer tokens from NASA Earthdata
        self.session = auth.get_session(bearer_token=True)

    self._debug = False

bounding_box(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)

Filter by granules that overlap a bounding box. Must be used in combination with a collection filtering parameter such as short_name or entry_title.

Parameters:

Name Type Description Default
lower_left_lon str

lower left longitude of the box

required
lower_left_lat str

lower left latitude of the box

required
upper_right_lon str

upper right longitude of the box

required
upper_right_lat str

upper right latitude of the box

required
Source code in earthaccess/search.py
def bounding_box(
    self,
    lower_left_lon: str,
    lower_left_lat: str,
    upper_right_lon: str,
    upper_right_lat: str,
) -> Type[GranuleQuery]:
    """Filter by granules that overlap a bounding box. Must be used in combination with
    a collection filtering parameter such as short_name or entry_title.

    Parameters:
        lower_left_lon: lower left longitude of the box
        lower_left_lat: lower left latitude of the box
        upper_right_lon: upper right longitude of the box
        upper_right_lat: upper right latitude of the box
    """
    super().bounding_box(
        lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat
    )
    return self

cloud_cover(min_cover=0, max_cover=100)

Filter by the percentage of cloud cover present in the granule.

Parameters:

Name Type Description Default
min_cover int

minimum percentage of cloud cover

0
max_cover int

maximum percentage of cloud cover

100
Source code in earthaccess/search.py
def cloud_cover(
    self, min_cover: int = 0, max_cover: int = 100
) -> Type[GranuleQuery]:
    """Filter by the percentage of cloud cover present in the granule.

    Parameters:
        min_cover: minimum percentage of cloud cover
        max_cover: maximum percentage of cloud cover
    """
    super().cloud_cover(min_cover, max_cover)
    return self

cloud_hosted(cloud_hosted=True)

Only match granules that are hosted in the cloud. This is valid for public collections and when using the short_name parameter. Concept-Id is unambiguous.

Tip

Cloud-hosted collections can be public or restricted. Restricted collections will not be matched using this parameter.

Parameters:

Name Type Description Default
cloud_hosted bool

True to require granules only be online

True
Source code in earthaccess/search.py
def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]:
    """Only match granules that are hosted in the cloud.
    This is valid for public collections and when using the short_name parameter.
    Concept-Id is unambiguous.

    ???+ Tip
        Cloud-hosted collections can be public or restricted.
        Restricted collections will not be matched using this parameter.

    Parameters:
        cloud_hosted: True to require granules only be online
    """
    if not isinstance(cloud_hosted, bool):
        raise TypeError("cloud_hosted must be of type bool")

    if "short_name" in self.params:
        provider = find_provider_by_shortname(
            self.params["short_name"], cloud_hosted
        )
        if provider is not None:
            self.params["provider"] = provider
    return self

daac(daac_short_name='')

Only match collections for a given DAAC. Default to on-prem collections for the DAAC.

Parameters:

Name Type Description Default
daac_short_name str

a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC

''
Source code in earthaccess/search.py
def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]:
    """Only match collections for a given DAAC. Default to on-prem collections for the DAAC.

    Parameters:
        daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC
    """
    if "cloud_hosted" in self.params:
        cloud_hosted = self.params["cloud_hosted"]
    else:
        cloud_hosted = False
    self.DAAC = daac_short_name
    self.params["provider"] = find_provider(daac_short_name, cloud_hosted)
    return self

data_center(data_center_name='')

An alias name for daac().

Parameters:

Name Type Description Default
data_center_name String

DAAC shortname, e.g. NSIDC, PODAAC, GESDISC

''
Source code in earthaccess/search.py
def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]:
    """An alias name for `daac()`.

    Parameters:
        data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC
    """
    return self.daac(data_center_name)

day_night_flag(day_night_flag)

Filter by period of the day the granule was collected during.

Parameters:

Name Type Description Default
day_night_flag str

"day", "night", or "unspecified"

required
Source code in earthaccess/search.py
def day_night_flag(self, day_night_flag: str) -> Type[GranuleQuery]:
    """Filter by period of the day the granule was collected during.

    Parameters:
        day_night_flag: "day", "night", or "unspecified"
    """
    super().day_night_flag(day_night_flag)
    return self

debug(debug=True)

If True, prints the actual query to CMR, notice that the pagination happens in the headers.

Parameters:

Name Type Description Default
debug bool

Print CMR query.

True
Source code in earthaccess/search.py
def debug(self, debug: bool = True) -> Type[GranuleQuery]:
    """If True, prints the actual query to CMR, notice that the pagination happens in the headers.

    Parameters:
        debug: Print CMR query.
    """
    self._debug = True
    return self

doi(doi)

Search data granules by DOI

Tip

Not all datasets have an associated DOI, internally if a DOI is found earthaccess will grab the concept_id for the query to CMR.

Parameters:

Name Type Description Default
doi str

DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS

required
Source code in earthaccess/search.py
def doi(self, doi: str) -> Type[GranuleQuery]:
    """Search data granules by DOI

    ???+ Tip
        Not all datasets have an associated DOI, internally if a DOI is found
        earthaccess will grab the concept_id for the query to CMR.

    Parameters:
        doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS
    """
    collection = DataCollections().doi(doi).get()
    if len(collection) > 0:
        concept_id = collection[0].concept_id()
        self.params["concept_id"] = concept_id
    else:
        print(
            f"earthaccess couldn't find any associated collections with the DOI: {doi}"
        )
    return self

downloadable(downloadable=True)

Only match granules that are available for download. The opposite of this method is online_only().

Parameters:

Name Type Description Default
downloadable bool

True to require granules be downloadable

True
Source code in earthaccess/search.py
def downloadable(self, downloadable: bool = True) -> Type[GranuleQuery]:
    """Only match granules that are available for download. The opposite of this
    method is online_only().

    Parameters:
        downloadable: True to require granules be downloadable
    """
    super().downloadable(downloadable)
    return self

get(limit=2000)

Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages.

Tip

The default page size is 2000, we need to be careful with the request size because all the JSON elements will be loaded into memory. This is more of an issue with granules than collections as they can be potentially millions of them.

Parameters:

Name Type Description Default
limit int

The number of results to return

2000

Returns:

Type Description
List[DataGranule]

query results as a list of DataGranules instances.

Source code in earthaccess/search.py
def get(self, limit: int = 2000) -> List[DataGranule]:
    """Get all the collections (datasets) that match with our current parameters
    up to some limit, even if spanning multiple pages.

    ???+ Tip
        The default page size is 2000, we need to be careful with the request size because all the JSON
        elements will be loaded into memory. This is more of an issue with granules than collections as
        they can be potentially millions of them.

    Parameters:
        limit: The number of results to return

    Returns:
        query results as a list of `DataGranules` instances.
    """
    response = get_results(self, limit)

    cloud = self._is_cloud_hosted(response[0])

    return list(DataGranule(granule, cloud_hosted=cloud) for granule in response)

granule_name(granule_name)

Find granules matching either granule ur or producer granule id, queries using the readable_granule_name metadata field.

Tip

We can use wildcards on a granule name to further refine our search, e.g. MODGRNLD.*.daily.*.

Parameters:

Name Type Description Default
granule_name str

granule name (accepts wildcards)

required
Source code in earthaccess/search.py
def granule_name(self, granule_name: str) -> Type[CollectionQuery]:
    """Find granules matching either granule ur or producer granule id,
    queries using the readable_granule_name metadata field.

    ???+ Tip
        We can use wildcards on a granule name to further refine our search,
        e.g. `MODGRNLD.*.daily.*`.

    Parameters:
        granule_name: granule name (accepts wildcards)
    """
    if not isinstance(granule_name, str):
        raise TypeError("granule_name must be of type string")

    self.params["readable_granule_name"] = granule_name
    self.params["options[readable_granule_name][pattern]"] = True
    return self

hits()

Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers.

Returns:

Type Description
int

The number of results reported by CMR.

Source code in earthaccess/search.py
def hits(self) -> int:
    """Returns the number of hits the current query will return.
    This is done by making a lightweight query to CMR and inspecting the returned headers.

    Returns:
        The number of results reported by CMR.
    """

    url = self._build_url()

    response = self.session.get(url, headers=self.headers, params={"page_size": 0})

    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError as ex:
        if ex.response is not None:
            raise RuntimeError(ex.response.text) from ex
        else:
            raise RuntimeError(str(ex)) from ex

    return int(response.headers["CMR-Hits"])

instrument(instrument='')

Filter by the instrument associated with the granule.

Parameters:

Name Type Description Default
instrument str

name of the instrument

''
Source code in earthaccess/search.py
def instrument(self, instrument: str = "") -> Type[GranuleQuery]:
    """Filter by the instrument associated with the granule.

    Parameters:
        instrument: name of the instrument
    """
    super().instrument(instrument)
    return self

line(coordinates)

Filter by granules that overlap a series of connected points. Must be used in combination with a collection filtering parameter such as short_name or entry_title.

Parameters:

Name Type Description Default
coordinates List[Tuple[str, str]]

a list of (lon, lat) tuples

required
Source code in earthaccess/search.py
def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]:
    """Filter by granules that overlap a series of connected points. Must be used in combination
    with a collection filtering parameter such as short_name or entry_title.

    Parameters:
        coordinates: a list of (lon, lat) tuples
    """
    super().line(coordinates)
    return self

online_only(online_only=True)

Only match granules that are listed online and not available for download. The opposite of this method is downloadable().

Parameters:

Name Type Description Default
online_only bool

True to require granules only be online

True
Source code in earthaccess/search.py
def online_only(self, online_only: bool = True) -> Type[GranuleQuery]:
    """Only match granules that are listed online and not available for download.
    The opposite of this method is downloadable().

    Parameters:
        online_only: True to require granules only be online
    """
    super().online_only(online_only)
    return self

orbit_number(orbit1, orbit2)

Filter by the orbit number the granule was acquired during. Either a single orbit can be targeted or a range of orbits.

Parameter
Source code in earthaccess/search.py
def orbit_number(self, orbit1: int, orbit2: int) -> Type[GranuleQuery]:
    """Filter by the orbit number the granule was acquired during. Either a single
    orbit can be targeted or a range of orbits.

    Parameter:
        orbit1: orbit to target (lower limit of range when orbit2 is provided)
        orbit2: upper limit of range
    """
    super().orbit_number(orbit1, orbit2)
    return self

parameters(**kwargs)

Provide query parameters as keyword arguments. The keyword needs to match the name of the method, and the value should either be the value or a tuple of values.

Example
query = DataCollections.parameters(short_name="AST_L1T",
                                   temporal=("2015-01","2015-02"),
                                   point=(42.5, -101.25))

Returns:

Type Description
Type[CollectionQuery]

Query instance

Source code in earthaccess/search.py
def parameters(self, **kwargs: Any) -> Type[CollectionQuery]:
    """Provide query parameters as keyword arguments. The keyword needs to match the name
    of the method, and the value should either be the value or a tuple of values.

    ???+ Example
        ```python
        query = DataCollections.parameters(short_name="AST_L1T",
                                           temporal=("2015-01","2015-02"),
                                           point=(42.5, -101.25))
        ```

    Returns:
        Query instance
    """
    methods = {}
    for name, func in getmembers(self, predicate=ismethod):
        methods[name] = func

    for key, val in kwargs.items():
        # verify the key matches one of our methods
        if key not in methods:
            raise ValueError("Unknown key {}".format(key))

        # call the method
        if isinstance(val, tuple):
            methods[key](*val)
        else:
            methods[key](val)

    return self

platform(platform='')

Filter by the satellite platform the granule came from.

Parameters:

Name Type Description Default
platform str

name of the satellite

''
Source code in earthaccess/search.py
def platform(self, platform: str = "") -> Type[GranuleQuery]:
    """Filter by the satellite platform the granule came from.

    Parameters:
        platform: name of the satellite
    """
    super().platform(platform)
    return self

point(lon, lat)

Filter by granules that include a geographic point.

Parameters:

Name Type Description Default
lon String

longitude of geographic point

required
lat String

latitude of geographic point

required
Source code in earthaccess/search.py
def point(self, lon: str, lat: str) -> Type[GranuleQuery]:
    """Filter by granules that include a geographic point.

    Parameters:
        lon (String): longitude of geographic point
        lat (String): latitude of geographic point
    """
    super().point(lon, lat)
    return self

polygon(coordinates)

Filter by granules that overlap a polygonal area. Must be used in combination with a collection filtering parameter such as short_name or entry_title.

Parameters:

Name Type Description Default
coordinates List[Tuple[str, str]]

list of (lon, lat) tuples

required
Source code in earthaccess/search.py
def polygon(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]:
    """Filter by granules that overlap a polygonal area. Must be used in combination with a
    collection filtering parameter such as short_name or entry_title.

    Parameters:
        coordinates: list of (lon, lat) tuples
    """
    super().polygon(coordinates)
    return self

provider(provider='')

Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. For example, PODAAC is a data center or DAAC, PODAAC is the default provider for on-prem data, and POCLOUD is the PODAAC provider for their data in the cloud.

Parameters:

Name Type Description Default
provider str

a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc.

''
Source code in earthaccess/search.py
def provider(self, provider: str = "") -> Type[CollectionQuery]:
    """Only match collections from a given provider.
    A NASA datacenter or DAAC can have one or more providers.
    For example, PODAAC is a data center or DAAC,
    PODAAC is the default provider for on-prem data, and POCLOUD is
    the PODAAC provider for their data in the cloud.

    Parameters:
        provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc.
    """
    self.params["provider"] = provider
    return self

short_name(short_name='')

Filter by short name (aka product or collection name).

Parameters:

Name Type Description Default
short_name str

name of a collection

''

Returns:

Type Description
Type[GranuleQuery]

Query instance

Source code in earthaccess/search.py
def short_name(self, short_name: str = "") -> Type[GranuleQuery]:
    """Filter by short name (aka product or collection name).

    Parameters:
        short_name: name of a collection

    Returns:
        Query instance
    """
    super().short_name(short_name)
    return self

temporal(date_from=None, date_to=None, exclude_boundary=False)

Filter by an open or closed date range. Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls to this method before calling execute().

Parameters:

Name Type Description Default
date_from Optional[Union[str, datetime]]

earliest date of temporal range

None
date_to Optional[Union[str, datetime]]

latest date of temporal range

None
exclude_boundary bool

whether to exclude the date_from/to in the matched range

False
Source code in earthaccess/search.py
def temporal(
    self,
    date_from: Optional[Union[str, dt.datetime]] = None,
    date_to: Optional[Union[str, dt.datetime]] = None,
    exclude_boundary: bool = False,
) -> Type[GranuleQuery]:
    """Filter by an open or closed date range.
    Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple
    ranges can be provided by successive calls to this method before calling execute().

    Parameters:
        date_from: earliest date of temporal range
        date_to: latest date of temporal range
        exclude_boundary: whether to exclude the date_from/to in the matched range
    """
    DEFAULT = dt.datetime(1979, 1, 1)
    if date_from is not None and not isinstance(date_from, dt.datetime):
        try:
            date_from = parser.parse(date_from, default=DEFAULT).isoformat() + "Z"
        except Exception:
            print("The provided start date was not recognized")
            date_from = ""

    if date_to is not None and not isinstance(date_to, dt.datetime):
        try:
            date_to = parser.parse(date_to, default=DEFAULT).isoformat() + "Z"
        except Exception:
            print("The provided end date was not recognized")
            date_to = ""

    super().temporal(date_from, date_to, exclude_boundary)
    return self

version(version='')

Filter by version. Note that CMR defines this as a string. For example, MODIS version 6 products must be searched for with "006".

Parameters:

Name Type Description Default
version str

version string

''
Source code in earthaccess/search.py
def version(self, version: str = "") -> Type[GranuleQuery]:
    """Filter by version. Note that CMR defines this as a string. For example,
    MODIS version 6 products must be searched for with "006".

    Parameters:
        version: version string
    """
    super().version(version)
    return self