Example of a TOOCAN data handling¶
This notebook contains some codes in python useful for the handling of the TOOCAN datasets.
In a first part of this notebook, you will find the following visualizations :
lifetime duration distributions
CDF of the maximum surface reached by the MCS at 235K
In a second part, an animation is generated to show the result of the MCS segmentation performed by the TOOCAN algorithm :
For this example, the region selected is AMERICA for the 2020 period and over a region of interest given by longitudes in the range -106° -40° and latitudes between -30° and 30°.
Libraries¶
#-*- coding:UTF-8 -*-
import sys
import numpy as np
import netCDF4
import pandas as pd
import xarray as xr
import glob
from Load_TOOCAN import *
import random
import itertools
import matplotlib.ticker as mticker
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import matplotlib.gridspec as gridspec
import matplotlib.colors as mcolors
from matplotlib.patches import Polygon
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.mpl.ticker as cticker
from cartopy.util import add_cyclic_point
import seaborn as sns
import re
from datetime import date, time, datetime, timedelta
import os
from os import listdir
from os.path import isfile, join
from alive_progress import alive_bar
from tqdm.notebook import tqdm
import math
import time
from PIL import Image
from IPython.display import HTML
from watermark import watermark
# "Scheduler" dask
from dask.distributed import LocalCluster, Client
# Structures de données
import dask
import dask.bag as db
import dask.array as da
import dask.dataframe as dd
# Others
from dask.diagnostics import ProgressBar
import ctypes
version = 'TOOCAN_v2.07'
region = 'AMERICA'
year = '2020'
# restricted region of interest
if (region=='AFRICA'):
lonmin, lonmax, latmin, latmax = -40, 40, -30, 30
if (region=='AMERICA'):
lonmin, lonmax, latmin, latmax = -106, -40, -30, 30
if (region=='EASTERNPACIFIC'):
lonmin, lonmax, latmin, latmax = 185, 250, -30, 30
if (region=='INDIA'):
lonmin, lonmax, latmin, latmax = 40, 101, -30, 30
if (region=='WESTERNPACIFIC'):
lonmin, lonmax, latmin, latmax = 101, 185, -30, 30
path = '/bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07'
pathImage_TOOCAN = f'{path}/{region}/{year}'
Load the MCS parameters into a class object¶
All_FileTOOCAN = sorted(glob.glob(pathImage_TOOCAN+'/FINAL_FileTracking/*.dat.gz'))
for ifileTOOCAN in np.arange(0,len(All_FileTOOCAN),1):
FileTOOCAN = All_FileTOOCAN[ifileTOOCAN]
if(ifileTOOCAN == 0) :
data = load_TOOCAN(FileTOOCAN)
else:
data = data + load_TOOCAN(FileTOOCAN)
/bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200101-20200131.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200201-20200229.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200301-20200331.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200401-20200430.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200501-20200531.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200601-20200630.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200701-20200731.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200801-20200831.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200901-20200930.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20201001-20201031.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20201101-20201130.dat.gz /bdd/MT_WORKSPACE/MCS/TOOCAN/TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20201201-20201231.dat.gz
Selection of the MCS correctly identified and tracked¶
Selection of the MCS overpassing the region of interest for the 2020 period and respecting the quality control criteria. The quality control criteria is here set at 11108, meaning :
- First digit = 1 -> MCS initiation not impacted by a recovery of the tracking following successive missing images
- Second digit = 1 -> MCS dissipation not impacted by an interruption of the tracking due to successive missing images
- Third digit = 1 -> MCS not impacted by the GEO image boundaries or missing pixels.
- Two last digits = 08 -> maximum number of missing images authorized during the whole MCS life cycle
init_len = len(data)
data = [data[iMCS] for iMCS in np.arange(0,len(data),1) if(int(data[iMCS].qc_MCS) <= 11108)]
print(' After selection of the MCS correctly identified and tracked: ', init_len, '->', len(data), 'MCS')
After selection of the MCS correctly identified and tracked: 441740 -> 428125 MCS
Selection of the MCSs which occured within the region of interest¶
data_MCSselection = []
for iMCS in np.arange(0,len(data),1):
id = [ilife for ilife in np.arange(0,data[iMCS].duration*2,1,dtype=int) if( (data[iMCS].clusters.lon[ilife] >= lonmin) & (data[iMCS].clusters.lon[ilife] <= lonmax) & (data[iMCS].clusters.lat[ilife] >= latmin) & (data[iMCS].clusters.lat[ilife] <= latmax)) ]
if(np.size(id) > 0):
data_MCSselection.append(data[iMCS])
print (' After Selection of the MCSs which occured within the region of interest:',len(data),'->', len(data_MCSselection))
After Selection of the MCSs which occured within the region of interest: 428125 -> 312681
Get the lifetime duration, Smax, Scum variables of all the segmented MCS previously selected¶
LifetimeDuration = [data_MCSselection[iMCS].duration for iMCS in np.arange(0,len(data_MCSselection),1)]
Smax = [data_MCSselection[iMCS].surfmaxkm2_235K for iMCS in np.arange(0,len(data_MCSselection),1)]
Scum = [data_MCSselection[iMCS].surfcumkm2_235K for iMCS in np.arange(0,len(data_MCSselection),1)]
MCS Lifetime duration distribution for the 2020 period¶
BINS = 2*np.arange(141)
fig, ax1 = plt.subplots(figsize=(10,8), dpi=120)
n, bins, patches = ax1.hist(LifetimeDuration, BINS, histtype='step', lw=2, color='mediumblue', label='lifetime duration [h]',log=1, rwidth=15, linewidth=3)
ax1.set_title(f'MCS Lifetime duration distribution\nregion: {region}: longitudes=[{lonmin}° ; {lonmax}°], latitudes=[{latmin}° ; {latmax}°] - {year}', fontsize=12)
ax1.set_xlabel('Lifetime duration [h]' ,fontsize=15)
ax1.set_ylabel('Population', fontsize=15)
ax1.xaxis.set_major_locator(mticker.MultipleLocator(4))
ax1.set_xlim(0, 80)
ax1.grid(True, linewidth=0.5, which='minor', axis='x')
ax1.grid(True, linewidth=1.5, which='major', axis='x')
ax1.grid(True, linewidth=0.5, which='minor', axis='y')
ax1.grid(True, linewidth=1.5, which='major', axis='y')
file=f"histo_lifetime_{region}_{year}.png"
plt.savefig(file, dpi=150, bbox_inches='tight', facecolor='w', transparent=False)
# fig.clf()
# plt.close()
CDF of the maximum surface reached by the MCS at 235K¶
BINS = 100*np.arange(1750000)
fig, ax1 = plt.subplots(figsize=(10,8), dpi=120)
n, bins, patches = ax1.hist(Smax, BINS, histtype='step', lw=2, color='mediumblue', rwidth=15, cumulative=1, linewidth=3)
ax1.set_title(f'Maximum surface in km² at 235K\nregion: {region}: longitudes=[{lonmin}° ; {lonmax}°], latitudes=[{latmin}° ; {latmax}°] - {year}\n', fontsize=12)
ax1.set_xlabel(f'Smax [km2]', fontsize=15)
ax1.set_ylabel('Population', fontsize=15)
ax1.set_xscale('log')
ax1.set_xlim(100, 1000000)
# ax1.set_ylim(0, 1)
ax1.grid(True, linewidth=0.5, which='minor', axis='x')
ax1.grid(True, linewidth=1, which='major', axis='x')
ax1.grid(True, linewidth=0.5, which='minor', axis='y')
ax1.grid(True, linewidth=1, which='major', axis='y')
file=f"cdf_Smax_{region}_{year}.png"
plt.savefig(file, dpi=150, bbox_inches='tight', facecolor='w', transparent=False)
# fig.clf()
# plt.close()
TOOCAN images and animation¶
version='TOOCAN_v2.07'
year=2020
month=2
day=1
path_filetracking = f'{version}/{region}/{year}/FINAL_FileTracking'
path_imseg = f'{version}/{region}/{year}/{year}'+'_'+str(month).zfill(2)+'_'+str(day).zfill(2)
print('path_filetracking: ',path_filetracking)
print(' path_imseg: ',path_imseg)
path_filetracking: TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking path_imseg: TOOCAN_v2.07/AMERICA/2020/2020_02_01
¤ Load TOOCAN data from tracking file (.dat) in data variable¶
def open_imsegTOOCAN_NETCDF(fileSeg_TOOCAN):
fh = netCDF4.Dataset(fileSeg_TOOCAN,'r')
MCS_Label = fh.variables['MCS_label'][:,:,:]
lat = fh.variables['latitude'][:]
lon = fh.variables['longitude'][:]
mat_lon = np.zeros((len(lat),len(lon)))
mat_lat = np.zeros((len(lat),len(lon)))
MCS_Label = MCS_Label[0,:,:]
for i in range(0,np.shape(mat_lon)[0],1): mat_lon[i,:] = lon
for j in range(0,np.shape(mat_lat)[1],1): mat_lat[:,j] = lat
fh.close()
del(fh)
return (MCS_Label,mat_lon,mat_lat)
All_FileTOOCAN = sorted(glob.glob(f'{path_filetracking}/*.dat.gz'))
for ifileTOOCAN in np.arange(0,len(All_FileTOOCAN),1):
FileTOOCAN = All_FileTOOCAN[ifileTOOCAN]
if(ifileTOOCAN == 0) :
data = load_TOOCAN(FileTOOCAN)
else:
data = data + load_TOOCAN(FileTOOCAN)
# Keep only MCS with a good quality flag
data = [data[iMCS] for iMCS in np.arange(0,len(data),1) if(int(data[iMCS].qc_MCS) <= 11108)]
# and in the region of interest
data_MCSselection = []
for iMCS in np.arange(0,len(data),1):
id = [ilife for ilife in np.arange(0,data[iMCS].duration*2,1,dtype=int) if( (data[iMCS].clusters.lon[ilife] >= lonmin) & (data[iMCS].clusters.lon[ilife] <= lonmax) & (data[iMCS].clusters.lat[ilife] >= latmin) & (data[iMCS].clusters.lat[ilife] <= latmax)) ]
if(np.size(id) > 0):
data_MCSselection.append(data[iMCS])
print (' After Selection of the MCSs which occured within the region of interest:', len(data),'->', len(data_MCSselection))
TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200101-20200131.dat.gz TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200201-20200229.dat.gz TOOCAN_v2.07/AMERICA/2020/FINAL_FileTracking/TOOCAN-AMERICA-20200301-20200331.dat.gz After Selection of the MCSs which occured within the region of interest: 88719 -> 62522
# computation of the number of the day since 01/01/1970
offset = datetime(1970,1,1,0)
dateTest = datetime(int(year), int(month),int(day), 0)
dateTest_since1970 = dateTest-offset
jday_since1970 = dateTest_since1970.days
jday_since1970
# Selection of the MCS which occured at the given date
idMCS = [iMCS for iMCS in np.arange(0,len(data),1) if(int(data[iMCS].Utime_Init) <= jday_since1970 and int(data[iMCS].Utime_End) >= jday_since1970)]
Generation of images with MCSs trajectories an entire day¶
# Random for the colors between 0 and 255
tabrandom = np.random.randint(255, size=100000000)
tabrandom[0] = -999
for slot in np.arange(1,49,1):
filesegImages = path_imseg+f'/ImageSegTOOCAN_{year}'+str(month).zfill(2)+str(day).zfill(2)+'-'+str(slot).zfill(2)+'.nc'
####################################################
#Â Open a segmented image produced by TOOCAN
####################################################
(imtoocan, matlon_TOOCAN, matlat_TOOCAN) = open_imsegTOOCAN_NETCDF(filesegImages)
imtoocan = imtoocan.astype(int)
imtoocan = imtoocan.data
ind = np.where((matlon_TOOCAN >= lonmin) & (matlon_TOOCAN <= lonmax) & (matlat_TOOCAN >= latmin) & (matlat_TOOCAN <= latmax))
colmin_TOOCAN = np.min(ind[0])
colmax_TOOCAN = np.max(ind[0])
linemin_TOOCAN = np.min(ind[1])
linemax_TOOCAN = np.max(ind[1])
imtoocan = imtoocan[colmin_TOOCAN:colmax_TOOCAN+1, linemin_TOOCAN:linemax_TOOCAN+1]
matlat_TOOCAN = matlat_TOOCAN[colmin_TOOCAN:colmax_TOOCAN+1, linemin_TOOCAN:linemax_TOOCAN+1]
matlon_TOOCAN = matlon_TOOCAN[colmin_TOOCAN:colmax_TOOCAN+1, linemin_TOOCAN:linemax_TOOCAN+1]
ind = np.where(imtoocan > 0)
imtoocan2 = np.zeros((np.shape(imtoocan)))
imtoocan2[ind[0][:],ind[1][:]]= tabrandom[imtoocan[ind[0][:],ind[1][:]]]
Hmasked = np.ma.masked_where(imtoocan2 <= 0,imtoocan2) # Mask pixels with a value of zero
#######################################################################
# Selection of the MCS which occured at the given date and at the slot
#######################################################################
# for iMCS in np.arange(0,len(idMCS),1) :
# if(round( 100*(data[idMCS[iMCS]].Utime_Init - round(data[idMCS[iMCS]].Utime_Init))) == slot) :
# idMCS_slot = iMCS
idMCS_slot = [iMCS for iMCS in np.arange(0,len(idMCS),1) if(round( 100*(data[idMCS[iMCS]].Utime_Init - round(data[idMCS[iMCS]].Utime_Init))) == slot)]
###################
# figure
###################
fig = plt.figure(figsize=(20, 5),facecolor='w')
layout = (1,1)
plot_1 = plt.subplot2grid(layout, (0, 0))
map = Basemap(projection='merc', lat_0=0, lon_0=(lonmax+lonmin)/2, area_thresh=0.1, llcrnrlon=lonmin, llcrnrlat=latmin, urcrnrlon=lonmax, urcrnrlat=latmax, ax=plot_1)
map.shadedrelief()
parallels = np.arange(-40.,40.,10.)
map.drawparallels(parallels,labels=[True,True,True,True], color='grey')
meridians = np.arange(-360.,360.,20.)
map.drawmeridians(meridians,labels=[True,True,True,True], color='grey')
x, y = map(matlon_TOOCAN, matlat_TOOCAN)
# r=map.pcolormesh(x, y, Hmasked,shading='flat', cmap = plt.cm.jet,vmin =0,vmax =255)
r=map.pcolormesh(x, y, Hmasked, shading='auto', cmap=plt.cm.jet, vmin=0, vmax=255) # shading='auto','nearest', or 'gouraud', or set rcParams['pcolor.shading']
plot_1.contour(x, y, imtoocan2, levels=np.arange(255), colors='white',linewidths=0.2)
plot_1.contour(x, y, imtoocan2, levels=np.arange(255), colors='grey',linewidths=0.18)
#########################################################
# Loop on the number of MCS to plot the MCS trajectories
#########################################################
for iMCS in np.arange(0,len(idMCS),1):
#######################################################################
# Selection of the MCS which occured at the given date and at the slot
#######################################################################
idslot = [ilife for ilife in np.arange(0,data[idMCS[iMCS]].duration*2,1,dtype=int) if( (round( 100*(data[idMCS[iMCS]].clusters.Utime[ilife] - round(data[idMCS[iMCS]].clusters.Utime[ilife]))) == slot) & (round(data[idMCS[iMCS]].clusters.Utime[ilife]) == jday_since1970)) ]
if(np.size(idslot) > 0):
lons = data[idMCS[iMCS]].clusters.lon[0:idslot[0]+1]
lats = data[idMCS[iMCS]].clusters.lat[0:idslot[0]+1]
xtraj, ytraj = map(lons, lats)
map.plot(xtraj, ytraj, marker='None',color='black', markersize=0.5, lw=0.5)
xtraj, ytraj = map(lons[idslot[0]], lats[idslot[0]])
map.plot(xtraj, ytraj, marker='o',color='black',markersize=1, lw=1)
if(np.size(idslot) > 1):
import sys
sys.exit("Error message")
if(idslot[0] > 0):
lons = data[idMCS[iMCS]].clusters.lon[idslot[0]-1:idslot[0]+1]
lats = data[idMCS[iMCS]].clusters.lat[idslot[0]-1:idslot[0]+1]
xtraj, ytraj = map(lons, lats)
plt.title(f'TOOCAN MCS tracking {year}'+str(month).zfill(2)+str(day).zfill(2)+'_'+str(slot).zfill(2)+'\n')
file=f"TOOCAN_{region}_"+str(year).zfill(4)+str(month).zfill(2)+str(day).zfill(2)+"_"+str(slot).zfill(2)+".png"
plt.savefig('images_toocan_png/'+file, dpi=150, bbox_inches='tight', facecolor='w', transparent=False)
fig.clf()
plt.close()
First image of 2020-02-01 (slot 01 corresponding to T00:00)¶
from IPython.display import Image as IPDimage
IPDimage(filename='images_toocan_png/TOOCAN_AMERICA_20200201_01.png')