Calculate cloud field properties from ASTER images¶

In [1]:

%pylab inline
%matplotlib inline

import multiprocessing
import warnings
warnings.simplefilter("ignore")

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from skimage import measure

import typhon as ty

from utils import *

Populating the interactive namespace from numpy and matplotlib

In [2]:

plt.style.use(ty.plots.styles.get('typhon'))

Get and open files¶

List of ASTER images on Feb 5

In [3]:

files0205 = ["TERRA_ASTER_processed_20200205_142515.nc",
             "TERRA_ASTER_processed_20200205_142524.nc",
             "TERRA_ASTER_processed_20200205_142533.nc",
             "TERRA_ASTER_processed_20200205_142542.nc",
             "TERRA_ASTER_processed_20200205_142551.nc",
             "TERRA_ASTER_processed_20200205_142600.nc",
             "TERRA_ASTER_processed_20200205_142608.nc",
             "TERRA_ASTER_processed_20200205_142617.nc",
             "TERRA_ASTER_processed_20200205_142626.nc",
             "TERRA_ASTER_processed_20200205_142635.nc",
             "TERRA_ASTER_processed_20200205_142644.nc",
             "TERRA_ASTER_processed_20200205_142653.nc",
             "TERRA_ASTER_processed_20200205_142702.nc",
             "TERRA_ASTER_processed_20200205_142710.nc",
             "TERRA_ASTER_processed_20200205_142719.nc",
             "TERRA_ASTER_processed_20200205_142728.nc",
             "TERRA_ASTER_processed_20200205_142737.nc",
             ]

Cloud sizes from single ASTER image¶

In [4]:

url = f"https://observations.ipsl.fr/thredds/dodsC/EUREC4A/SATELLITES/TERRA/ASTER/nc/{files0205[5]}"

store = xr.backends.PydapDataStore.open(url)
ds = xr.open_dataset(store)

extract cloud mask¶

In [5]:

ds.cloudmask

Out[5]:

xarray.DataArray

'cloudmask'

pixel_alongtrack_VNIR: 4200
pixel_acrosstrack_VNIR: 4980

...
```
[20916000 values with dtype=float32]
```
Coordinates: (0)
Attributes: (7)
description :
ASTER cloud mask according to Werner et al., 2016.
flag_meanings :
confidently_clear probably _clear probably_cloudy confidently_cloudy
flag_values :
[2, 3, 4, 5]
valid_range :
[2.0, 5.0]
long_name :
ASTER cloud mask
units :
unitless
_ChunkSizes :
[840, 996]

Convert the 4 flags to a binary cloud mask¶

In [6]:

cloudmask = np.zeros_like(ds.cloudmask)

## only trust "confidently cloudy" labels
#clmask[ds.cloudmask==5] = 1

## cloudy =  "confidently cloudy" and "probably cloudy"
cloudmask[np.logical_or(ds.cloudmask==4, ds.cloudmask==5)] = 1

In [7]:

# quicklook
plt.imshow(cloudmask, cmap="Greys_r")

Out[7]:

<matplotlib.image.AxesImage at 0x2b771367ca10>

filter cloud mask¶

single pixel "clouds" rather reflect instrument noise and shall be excluded from an statistical analysis

In [8]:

# filtering takes quite a long time
cloudmask_filtered = filter_cloudmask(cloudmask, threshold=4, connectivity=2)
print(f"cloud fraction of clouds > 4 pixel: {cloudfraction(cloudmask_filtered)}")

cloud fraction of clouds > 4 pixel: 0.08830655957161981

In [9]:

# cloud labeling based on 8-connectivity
labels = measure.label(cloudmask_filtered, connectivity=2)
props = measure.regionprops(labels)
# extract cloud equivalent diameters and convert the unit to km.
eqd = np.array([p.equivalent_diameter for p in props]) * 15 / 1000
print(f"There are {len(eqd)} clouds detected")

There are 4724 clouds detected

Cloud top height¶

In [18]:

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20, 7))
ds.cloudtopheight.plot(ax=ax1)
ds.cloudmask.plot(ax=ax2)

Out[18]:

<matplotlib.collections.QuadMesh at 0x2b772c4248d0>

In [34]:

# artificially replicate CTH estimates as they are derived from 90m resolution TIR bands
cth = np.repeat(np.repeat(ds.cloudtopheight.data, 6, axis=0), 6, axis=1)
np.shape(cth)

Out[34]:

(4200, 4980)

In [36]:

cth_means = [np.nanmean(cth[labels==l]) for l in range(1, labels.max() + 1)]

Cluster index Iorg¶

In [43]:

iorg = iorg(cloudmask=cloudmask_filtered, connectivity=2)

Cloud field properties on Feb 5¶

pre-calculate cloud field properties for the images recorded on February 5 and save them to netCDF files.

In [4]:

def get_cloudstatistics(asterfile):
    print(f"processing file {asterfile}...")
    url = f"https://observations.ipsl.fr/thredds/dodsC/EUREC4A/SATELLITES/TERRA/ASTER/nc/{asterfile}"
    store = xr.backends.PydapDataStore.open(url)
    ds = xr.open_dataset(store)
    cloudmask = np.zeros_like(ds.cloudmask)
    cloudmask[ds.cloudmask==5] = 1
    #cloudmask[np.logical_or(ds.cloudmask==4, ds.cloudmask==5)] = 1
    cloudmask_filtered = filter_cloudmask(cloudmask, threshold=4, connectivity=2)
    # cloud fraction
    fraction = cloudfraction(cloudmask_filtered)
    # cloud sizes
    labels = measure.label(cloudmask_filtered, connectivity=2)
    props = measure.regionprops(labels)
    # extract cloud equivalent diameters and convert the unit to km.
    eqd = np.array([p.equivalent_diameter for p in props]) * 15 / 1000
    # average cloud top height per cloud
    cth = np.repeat(np.repeat(ds.cloudtopheight.data, 6, axis=0), 6, axis=1)
    cth_means = [np.nanmean(cth[labels==l]) for l in range(1, labels.max() + 1)]
    # cluster index iorg
    clustering = iorg(cloudmask=cloudmask_filtered, connectivity=2)
    return fraction, eqd, cth_means, clustering

In [28]:

%%time
with multiprocessing.Pool(4) as p:
    stats = p.map(get_cloudstatistics, files0205)

processing file TERRA_ASTER_processed_20200205_142515.nc...
processing file TERRA_ASTER_processed_20200205_142551.nc...processing file TERRA_ASTER_processed_20200205_142533.nc...

processing file TERRA_ASTER_processed_20200205_142608.nc...
processing file TERRA_ASTER_processed_20200205_142524.nc...
processing file TERRA_ASTER_processed_20200205_142542.nc...
processing file TERRA_ASTER_processed_20200205_142600.nc...
processing file TERRA_ASTER_processed_20200205_142617.nc...
processing file TERRA_ASTER_processed_20200205_142626.nc...
processing file TERRA_ASTER_processed_20200205_142644.nc...
processing file TERRA_ASTER_processed_20200205_142702.nc...
processing file TERRA_ASTER_processed_20200205_142710.nc...
processing file TERRA_ASTER_processed_20200205_142653.nc...
processing file TERRA_ASTER_processed_20200205_142719.nc...
processing file TERRA_ASTER_processed_20200205_142737.nc...
processing file TERRA_ASTER_processed_20200205_142635.nc...
processing file TERRA_ASTER_processed_20200205_142728.nc...
CPU times: user 1.27 s, sys: 486 ms, total: 1.76 s
Wall time: 29min 51s

In [30]:

cf, sizes, cth, clustering = zip(*stats)

Write cloud statistics to netCDF per image¶

In [32]:

for i, file in enumerate(files0205):
    ds = xr.Dataset(
        coords={"cloud_id": np.arange(sizes[i].size)},
        data_vars={
            "cloudfraction": cf[i],
            "cloudsizes": (("cloud_id",), sizes[i]),
            "cloudtopheight": (("cloud_id",), cth[i]),
            "Iorg": clustering[i],
        })
    ds.attrs = {"datetime": file[22:-3]}
    ds.to_netcdf(f"cloudstatistics_{file}")

Test opening one file¶

In [97]:

test = xr.open_dataset(f"cloudstatistics_{files0205[5]}")
test