Calculate cloud field properties from ASTER cloud masks on February 5

Posted on Do 06 August 2020 in cloud field properties • by Theresa Mieslinger

open in binder
view on github
download notebook

Calculate cloud field properties from ASTER images

In [1]:
%pylab inline
%matplotlib inline

import multiprocessing
import warnings
warnings.simplefilter("ignore")

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from skimage import measure

import typhon as ty

from utils import *
Populating the interactive namespace from numpy and matplotlib
In [2]:
plt.style.use(ty.plots.styles.get('typhon'))

Get and open files

List of ASTER images on Feb 5

In [3]:
files0205 = ["TERRA_ASTER_processed_20200205_142515.nc",
             "TERRA_ASTER_processed_20200205_142524.nc",
             "TERRA_ASTER_processed_20200205_142533.nc",
             "TERRA_ASTER_processed_20200205_142542.nc",
             "TERRA_ASTER_processed_20200205_142551.nc",
             "TERRA_ASTER_processed_20200205_142600.nc",
             "TERRA_ASTER_processed_20200205_142608.nc",
             "TERRA_ASTER_processed_20200205_142617.nc",
             "TERRA_ASTER_processed_20200205_142626.nc",
             "TERRA_ASTER_processed_20200205_142635.nc",
             "TERRA_ASTER_processed_20200205_142644.nc",
             "TERRA_ASTER_processed_20200205_142653.nc",
             "TERRA_ASTER_processed_20200205_142702.nc",
             "TERRA_ASTER_processed_20200205_142710.nc",
             "TERRA_ASTER_processed_20200205_142719.nc",
             "TERRA_ASTER_processed_20200205_142728.nc",
             "TERRA_ASTER_processed_20200205_142737.nc",
             ]

Cloud sizes from single ASTER image

In [4]:
url = f"https://observations.ipsl.fr/thredds/dodsC/EUREC4A/SATELLITES/TERRA/ASTER/nc/{files0205[5]}"

store = xr.backends.PydapDataStore.open(url)
ds = xr.open_dataset(store)

extract cloud mask

In [5]:
ds.cloudmask
Out[5]:
Show/Hide data repr Show/Hide attributes
xarray.DataArray
'cloudmask'
  • pixel_alongtrack_VNIR: 4200
  • pixel_acrosstrack_VNIR: 4980
  • ...
    [20916000 values with dtype=float32]
    • description :
      ASTER cloud mask according to Werner et al., 2016.
      flag_meanings :
      confidently_clear probably _clear probably_cloudy confidently_cloudy
      flag_values :
      [2, 3, 4, 5]
      valid_range :
      [2.0, 5.0]
      long_name :
      ASTER cloud mask
      units :
      unitless
      _ChunkSizes :
      [840, 996]

    Convert the 4 flags to a binary cloud mask

    In [6]:
    cloudmask = np.zeros_like(ds.cloudmask)
    
    ## only trust "confidently cloudy" labels
    #clmask[ds.cloudmask==5] = 1
    
    ## cloudy =  "confidently cloudy" and "probably cloudy"
    cloudmask[np.logical_or(ds.cloudmask==4, ds.cloudmask==5)] = 1
    
    In [7]:
    # quicklook
    plt.imshow(cloudmask, cmap="Greys_r")
    
    Out[7]:
    <matplotlib.image.AxesImage at 0x2b771367ca10>

    filter cloud mask

    single pixel "clouds" rather reflect instrument noise and shall be excluded from an statistical analysis

    In [8]:
    # filtering takes quite a long time
    cloudmask_filtered = filter_cloudmask(cloudmask, threshold=4, connectivity=2)
    print(f"cloud fraction of clouds > 4 pixel: {cloudfraction(cloudmask_filtered)}")
    
    cloud fraction of clouds > 4 pixel: 0.08830655957161981
    
    In [9]:
    # cloud labeling based on 8-connectivity
    labels = measure.label(cloudmask_filtered, connectivity=2)
    props = measure.regionprops(labels)
    # extract cloud equivalent diameters and convert the unit to km.
    eqd = np.array([p.equivalent_diameter for p in props]) * 15 / 1000
    print(f"There are {len(eqd)} clouds detected")
    
    There are 4724 clouds detected
    

    Cloud top height

    In [18]:
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20, 7))
    ds.cloudtopheight.plot(ax=ax1)
    ds.cloudmask.plot(ax=ax2)
    
    Out[18]:
    <matplotlib.collections.QuadMesh at 0x2b772c4248d0>
    In [34]:
    # artificially replicate CTH estimates as they are derived from 90m resolution TIR bands
    cth = np.repeat(np.repeat(ds.cloudtopheight.data, 6, axis=0), 6, axis=1)
    np.shape(cth)
    
    Out[34]:
    (4200, 4980)
    In [36]:
    cth_means = [np.nanmean(cth[labels==l]) for l in range(1, labels.max() + 1)]
    

    Cluster index Iorg

    In [43]:
    iorg = iorg(cloudmask=cloudmask_filtered, connectivity=2)
    

    Cloud field properties on Feb 5

    pre-calculate cloud field properties for the images recorded on February 5 and save them to netCDF files.

    In [4]:
    def get_cloudstatistics(asterfile):
        print(f"processing file {asterfile}...")
        url = f"https://observations.ipsl.fr/thredds/dodsC/EUREC4A/SATELLITES/TERRA/ASTER/nc/{asterfile}"
        store = xr.backends.PydapDataStore.open(url)
        ds = xr.open_dataset(store)
        cloudmask = np.zeros_like(ds.cloudmask)
        cloudmask[ds.cloudmask==5] = 1
        #cloudmask[np.logical_or(ds.cloudmask==4, ds.cloudmask==5)] = 1
        cloudmask_filtered = filter_cloudmask(cloudmask, threshold=4, connectivity=2)
        # cloud fraction
        fraction = cloudfraction(cloudmask_filtered)
        # cloud sizes
        labels = measure.label(cloudmask_filtered, connectivity=2)
        props = measure.regionprops(labels)
        # extract cloud equivalent diameters and convert the unit to km.
        eqd = np.array([p.equivalent_diameter for p in props]) * 15 / 1000
        # average cloud top height per cloud
        cth = np.repeat(np.repeat(ds.cloudtopheight.data, 6, axis=0), 6, axis=1)
        cth_means = [np.nanmean(cth[labels==l]) for l in range(1, labels.max() + 1)]
        # cluster index iorg
        clustering = iorg(cloudmask=cloudmask_filtered, connectivity=2)
        return fraction, eqd, cth_means, clustering
    
    In [28]:
    %%time
    with multiprocessing.Pool(4) as p:
        stats = p.map(get_cloudstatistics, files0205)
    
    processing file TERRA_ASTER_processed_20200205_142515.nc...
    processing file TERRA_ASTER_processed_20200205_142551.nc...processing file TERRA_ASTER_processed_20200205_142533.nc...
    
    processing file TERRA_ASTER_processed_20200205_142608.nc...
    processing file TERRA_ASTER_processed_20200205_142524.nc...
    processing file TERRA_ASTER_processed_20200205_142542.nc...
    processing file TERRA_ASTER_processed_20200205_142600.nc...
    processing file TERRA_ASTER_processed_20200205_142617.nc...
    processing file TERRA_ASTER_processed_20200205_142626.nc...
    processing file TERRA_ASTER_processed_20200205_142644.nc...
    processing file TERRA_ASTER_processed_20200205_142702.nc...
    processing file TERRA_ASTER_processed_20200205_142710.nc...
    processing file TERRA_ASTER_processed_20200205_142653.nc...
    processing file TERRA_ASTER_processed_20200205_142719.nc...
    processing file TERRA_ASTER_processed_20200205_142737.nc...
    processing file TERRA_ASTER_processed_20200205_142635.nc...
    processing file TERRA_ASTER_processed_20200205_142728.nc...
    CPU times: user 1.27 s, sys: 486 ms, total: 1.76 s
    Wall time: 29min 51s
    
    In [30]:
    cf, sizes, cth, clustering = zip(*stats)
    

    Write cloud statistics to netCDF per image

    In [32]:
    for i, file in enumerate(files0205):
        ds = xr.Dataset(
            coords={"cloud_id": np.arange(sizes[i].size)},
            data_vars={
                "cloudfraction": cf[i],
                "cloudsizes": (("cloud_id",), sizes[i]),
                "cloudtopheight": (("cloud_id",), cth[i]),
                "Iorg": clustering[i],
            })
        ds.attrs = {"datetime": file[22:-3]}
        ds.to_netcdf(f"cloudstatistics_{file}")
    

    Test opening one file

    In [97]:
    test = xr.open_dataset(f"cloudstatistics_{files0205[5]}")
    test
    
    Out[97]:
    Show/Hide data repr Show/Hide attributes
    xarray.Dataset
      • cloud_id: 4724
      • cloud_id
        (cloud_id)
        int64
        0 1 2 3 4 ... 4720 4721 4722 4723
        array([   0,    1,    2, ..., 4721, 4722, 4723])
      • cloudfraction
        ()
        float64
        ...
        array(0.088307)
      • cloudsizes
        (cloud_id)
        float64
        ...
        array([0.304662, 0.20591 , 0.533361, ..., 0.101554, 0.117265, 0.056136])
      • cloudtopheight
        (cloud_id)
        float32
        ...
        array([ 823.48883,        nan, 1057.18   , ...,        nan,        nan,
                      nan], dtype=float32)
      • Iorg
        ()
        float64
        ...
        array(0.690929)
    • datetime :
      20200205_142600