forestatrisk
rasters as Cloud Optimized GeoTIFFs (COG)¶Notebook: cog.ipynb
A Cloud Optimized GeoTIFF (COG) is a regular GeoTIFF file, aimed at being hosted on a HTTP file server, with an internal organization that enables more efficient workflows on the cloud. It does this by leveraging the ability of clients issuing HTTP GET range requests to ask for just the parts of a file they need.
Some ressources on COGs:
The rasterio
Python module can be used to read COGs:
It is preferable to install the rasterio
module inside a Python virtual environment. This virtual environment can be created using pyenv
in a terminal:
# Install Python 3.6.10
pyenv install 3.6.10
pyenv local 3.6.10
# Creating the virtualenv from our current python version
pyenv virtualenv rasterio-cog
# Activate the environment (pyenv can use automatic tab completion)
pyenv activate 3.6.10/envs/rasterio-cog
# Install Python modules
pip install numpy
pip install matplotlib # To plot rasters
pip install jupyter # To run this jupyter notebook
pip install rasterio
We import the necessary Python modules and create a color palette for the raster map.
# Import modules
import os
import random
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
import rasterio
from rasterio import warp
# Set environmental variable for CA certificates
os.environ["CURL_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"
# Color palette
col = [(255, 165, 0, 255), (227, 26, 28, 255), (34, 139, 34, 255)]
colors = [(1, 1, 1, 0)] # transparent white for 0
cmax = 255.0 # float for division
for i in range(3):
col_class = tuple(np.array(col[i]) / cmax)
colors.append(col_class)
color_map = ListedColormap(colors)
# Get information on the geotiff file
print('Raster on forestatrisk.cirad.fr:')
filepath = 'https://forestatrisk.cirad.fr/tropics/tif/fcc_123_AFR_aea.tif'
with rasterio.open(filepath) as src:
print(src.profile)
Raster on forestatrisk.cirad.fr: {'driver': 'GTiff', 'dtype': 'uint8', 'nodata': 0.0, 'width': 280148, 'height': 163042, 'count': 1, 'crs': CRS.from_wkt('PROJCS["unknown",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["latitude_of_center",0],PARAMETER["longitude_of_center",25],PARAMETER["standard_parallel_1",20],PARAMETER["standard_parallel_2",-23],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'), 'transform': Affine(30.0, 0.0, -4446090.0, 0.0, -30.0, 1916550.0), 'blockxsize': 256, 'blockysize': 256, 'tiled': True, 'compress': 'deflate', 'interleave': 'band'}
# Get overview
with rasterio.open(filepath) as src:
oviews = src.overviews(1) # list of overviews from biggest to smallest
oview = oviews[-1] # let's look at the smallest overview
print('Decimation factor= {}'.format(oview))
# NOTE this is using a 'decimated read' (http://rasterio.readthedocs.io/en/latest/topics/resampling.html)
ovr = src.read(1, out_shape=(1, int(src.height // oview), int(src.width // oview)))
Decimation factor= 64
# Overview array
print('array type: ', type(ovr))
print(ovr.shape)
print(ovr)
array type: <class 'numpy.ndarray'> (2547, 4377) [[0 0 0 ... 0 0 0] [0 0 0 ... 0 0 0] [0 0 0 ... 0 0 0] ... [0 0 0 ... 0 0 0] [0 0 0 ... 0 0 0] [0 0 0 ... 0 0 0]]
# Plot overview
plt.imshow(ovr, cmap=color_map)
plt.xlabel('Column #')
plt.ylabel('Row #')
plt.show()
Another nice feature of COGs is that you can request a subset of the image and only that subset will be downloaded to your computer.
We write a small function to compute a Rasterio's window from a bounding box in Lon/Lat
# Function to return a window in rows and cols
def win_rows_cols(file_path, ll, ur):
"""
Return a window in the format (col_off, row_off, width, height)
from a window in longitude/latitude.
:param file_path: path to the COG file.
:param ll: list of lower-left corner coordinates in lon/lat.
:param ur: list of upper-right corner coordinates in lon/lat.
:retur: a tupple (col_off, row_off, width, height).
"""
# Get crs and transform from COG file
with rasterio.open(file_path) as src:
src_crs = src.crs
src_transform = src.transform
# Projected coordinates
X, Y = rasterio.warp.transform({'init': 'EPSG:4326'}, src_crs, [ll[0], ur[0]], [ll[1], ur[1]])
# Transform in row, col
(rows, cols) = rasterio.transform.rowcol(src_transform, X, Y)
ncols = cols[1]-cols[0]
nrows = rows[0]-rows[1] # ! rows[0] > rows[1]
return {"col_off": cols[0], "row_off": rows[1], "width": ncols, "height": nrows}
We use a bounding box over the Perinet reserve in Madagascar.
# Compute window in rows and cols
win = win_rows_cols(filepath, ll=[48.3890, -18.9784], ur=[48.5376, -18.8654])
print(win)
# Rasterio's window
window = rasterio.windows.Window(win["col_off"], win["row_off"],
win["width"], win["height"])
{'col_off': 228281, 'row_off': 137986, 'width': 513, 'height': 423}
# Plot raster using window
with rasterio.open(filepath) as src:
subset = src.read(1, window=window)
plt.imshow(subset, cmap=color_map)
plt.title(f'Subset\n{window}')
plt.xlabel('Column #')
plt.ylabel('Row #')
plt.show()
cmd = "gdal_translate -projwin 48.3890 -18.8654 48.5376 -18.9784 -projwin_srs EPSG:4326 \
/vsicurl/https://forestatrisk.cirad.fr/tropics/tif/fcc_123_AFR_aea.tif perinet.tif"
os.system(cmd)
0