Hi, I need to process some S-1 images and I’ve written a code using the snappy module in python, which does several operations on those images (The code I’m using is at the bottom of this post).
I basically loop through all the images in a directory, execute (almost) the same operations for all the images and write the output products in another directory.
The problem I’m facing though is that, for each iteration, python doesn’t clear its memory, thus resulting in a code which runs slower and slower (I have not run into any memory heap error yet cause my system has 64GB memory and I set the java_max_mem in the snappy.ini file to 45GB).
I also tried to call the garbage collector or dispose the source products after I wrote it into a file, but none of those things seemed to solve the issue.
The real problem is that even if I reset the workspace (by calling %reset cause I’m using IPyhon Console in Spyder), I got the same memory usage like if I still had all the objects stored in the workspace.
Anybody who found the same issue and can give me some advice on this?
Here is the code I’m using:
import os
import snappy
import numpy as np
import pandas as pd
import re
import datetime as dt
import csv
import sys
import time
from zipfile import ZipFile as zipf
from snappy import HashMap, ProductIO, GPF, jpy
#==============================================================================
# Function to get full list of Operator parameters with their descriptions
#==============================================================================
#def Op_help(op):
# op_spi = snappy.GPF.getDefaultInstance().getOperatorSpiRegistry().getOperatorSpi(op)
# print('Op name: {}'.format(op_spi.getOperatorDescriptor().getName()))
# print('Op alias: {}\n'.format(op_spi.getOperatorDescriptor().getAlias()))
# print('PARAMETERS:\n')
# param_Desc = op_spi.getOperatorDescriptor().getParameterDescriptors()
# for param in param_Desc:
# print('{}: {}\nDefault Value: {}\nPossible other values: {}\n'.format(param.getName(),param.getDescription(),param.getDefaultValue(),list(param.getValueSet())))
#==============================================================================
# Function to convert meters in geo degrees
#==============================================================================
def m2geodeg(m):
import numpy as np
fact = (6378137*np.pi*2)/360
geodeg = m/fact
return geodeg
#==============================================================================
# Function to get the resolution of the product bands from its Metadata
#==============================================================================
def getResolution(source):
import numpy as np
k=0
bnames = source.getBandNames()
pixres = np.zeros(len(bnames))
els=source.getMetadataRoot().getElement('Level-1C_User_Product').getElement('General_Info').getElement('Product_Image_Characteristics').getElement('Spectral_Information_List').getElements()
for el in els:
if el.getAttributeString('physicalBand') in bnames:
pixres[k]=el.getAttributeDouble('RESOLUTION')
k+=1
return pixres
st=time.time()
#System = jpy.get_type('java.lang.System')
#==============================================================================
# SET DIRECTORIES
#==============================================================================
wdir = r'C:\ProgramData\Anaconda\Spyder_DATA\Lavoro\Soil_Moisture_Monitoring' #predefined working directory
os.chdir(wdir)
InputFold = r'E:\Users\Davide_Marchegiani\SoilMoisture\Data\REMEDHUS\Sentinel_data\Sentinel_1\20170101-20170630_Ascending' #Input Data folder
files=os.listdir(InputFold)
OutputFold = InputFold + '_PROCESSED'
os.makedirs(OutputFold,exist_ok=True)
HashMap = jpy.get_type('java.util.HashMap')
#Get snappy operators
GPF.getDefaultInstance().getOperatorSpiRegistry().loadOperatorSpis()
# Get subset geometry from WKT file
wktfile = r'E:\Users\Davide_Marchegiani\SoilMoisture\Data\REMEDHUS\qgis_data\Remedhus_stations_subset_off1000m.csv'
with open(wktfile[:-4]+'_PLUS.csv') as csvfile:
csv_read = csv.reader(csvfile)
for row in csv_read:
wkt = (row[0])
geom=snappy.WKTReader().read(wkt)
#==============================================================================
# LOOP OVER THE SENTINEL DATA
#==============================================================================
#num = len(files)
num = 2
count = 1
for i in range(0,num):
### S-1 SLICE ASSEMBLY (MOSAIC)
print('Iter = {} -- Begin algorithm'.format(i+1))
sys.stdout.flush()
if count > 1:
count -= 1
continue
ind = [m.start() for m in re.finditer('_',files[i])]
datestart = files[i][ind[3]+1:ind[4]]
# count = 1
if i+count != num:
while files[i][:ind[4]-7] == files[i+count][:ind[4]-7]:
count += 1
if i+count == num:
break
if count > 1:
products = jpy.array('org.esa.snap.core.datamodel.Product', count)
for k in range(0,count):
products[k] = ProductIO.readProduct(os.path.join(InputFold,files[i+k]))
parameters = HashMap()
source = GPF.createProduct('SliceAssembly', parameters, products)
# prod_mosaic = source
# ProductIO.writeProduct(prod_mosaic,'prova_MOSAIC', 'BEAM-DIMAP')
else:
source = ProductIO.readProduct(os.path.join(InputFold,files[i]))
dateend = files[i+count-1][ind[4]+1:ind[5]]
# print(i)
#==============================================================================
# # SET OPERATIONS TO DO
#==============================================================================
### SUBSET (coarse)
parameters = HashMap()
parameters.put('copyMetadata',True)
parameters.put('geoRegion', geom)
source = GPF.createProduct('Subset', parameters, source)
# prod_subin = source
# ProductIO.writeProduct(prod_subin,files[i][:-4], 'BEAM-DIMAP')
### APPLY ORBIT FILES
parameters = HashMap()
parameters.put('continueOnFail',True)
parameters.put('polyDegree',3)
source = GPF.createProduct('Apply-Orbit-File', parameters, source)
# prod_orb = source
# ProductIO.writeProduct(prod_orb,'prova_ORBIT', 'BEAM-DIMAP')
### CALIBRATION
bandnames = source.getBandNames();
parameters = HashMap()
parameters.put('outputSigmaBand',True)
parameters.put('sourceBandNames',bandnames)
source = snappy.GPF.createProduct('Calibration', parameters, source)
# prod_calib = source
# ProductIO.writeProduct(prod_calib,'prova_CALIB', 'BEAM-DIMAP')
### MULTILOOK
nRg = 4;
nAz = nRg;
bandnames = source.getBandNames();
parameters = HashMap()
parameters.put('sourceBandNames',bandnames)
parameters.put('nRgLooks',nRg)
parameters.put('nAzLooks',nAz)
source = snappy.GPF.createProduct('Multilook', parameters, source)
# prod_multi = source
# ProductIO.writeProduct(prod_multi,'prova_MULTI', 'BEAM-DIMAP')
### RANGE DOPPLER TERRAIN CORRECTION
# pix_sp_m = float(nRg*10)
bandnames = source.getBandNames();
parameters = HashMap()
parameters.put('sourceBandNames',bandnames)
# parameters.put('demName', 'SRTM 1Sec HGT')
parameters.put('imgResamplingMethod', 'NEAREST_NEIGHBOUR')
# parameters.put('pixelSpacingInMeter', pix_sp_m)
parameters.put('saveLocalIncidenceAngle', True)
parameters.put('saveSelectedSourceBand', True)
# parameters.put('applyRadiometricNormalization', True)
parameters.put('saveSigmaNought', True)
source = snappy.GPF.createProduct('Terrain-Correction', parameters, source)
# prod_tercorr = source
# ProductIO.writeProduct(prod_tercorr,'prova_TERCORR2', 'BEAM-DIMAP')
### SUBSET
# Get subset geometry from WKT file
with open(wktfile) as csvfile:
csv_read = csv.reader(csvfile)
for row in csv_read:
wkt = (row[0])
geom=snappy.WKTReader().read(wkt)
# Set SUBSET parameters
parameters = HashMap()
parameters.put('copyMetadata',True)
parameters.put('geoRegion', geom)
source = GPF.createProduct('Subset', parameters, source)
# prod_subfin = source
# ProductIO.writeProduct(prod_subfin,'prova_SUBSET_FIN', 'BEAM-DIMAP')
### WRITE PRODUCT
print('Iter = {} -- Writing product'.format(i+1))
sys.stdout.flush()
ProductIO.writeProduct(source,os.path.join(OutputFold,files[i][:ind[4]]+'_'+dateend+files[i][ind[5]:-4]), 'BEAM-DIMAP')
print('Product written. -- t = {:.2f}s'.format(time.time()-st))
source.dispose()
# System.gc()
print('Done! -- t = {:.2f}s'.format(time.time()-st))
By looking more deeply into this problem I found out that:
1) the process which takes more memory consumption seems to be the ProductIO.writeProduct since, considering only 2 iterations (I set num = 2), running all the lines above ProductIO.writeProduct(…) produces a memory consumption of approximately 750 MB, whereas including the ProductIO.writeProduct(…) line it reaches about 8.5 GB of memory usage.
(Now, for example, if I execute the command again, without closing the console, I get a memory usage of 15.5 GB so the memory seems to stack over) ;
2) if by some chance I open a new IPython console and just run
import snappy
and then
%reset
it behaves as said above, it won’t clear the cache memory which remains at about 300 MB.
So it looks like it is an issue of the snappy module itself which somehow doesn’t clear the memory cache?!
Any help would be much appreciated! Thank you very much!