NOTE: first you need a csv with the imagery you want to download
api = SentinelAPI('username', 'password', 'https://scihub.copernicus.eu/dhus')
# search by polygon, time, and SciHub query keywords
footprint = geojson_to_wkt(read_geojson(r"AOI.json"))
products = api.query(footprint,
date=(‘startyyyymmdd’, ‘endyyyymmdd’),
platformname=‘Sentinel-2’, producttype=‘S2MSI1C’)
# convert to Pandas DataFrame
products_df = api.to_dataframe(products)
# import dataframe to csv
products_df.to_csv(r"output.csv", index=False, header=True)
print(“csv of records downloaded”)
Now you can use that csv to download your data
csv_folder is a folder with your csv in it, this was before I realized that I should have had a limit to the number of images I was looking for at a time, so really you could probably change this up so you don’t need to have it in a folder.
NOTE that you really can only have a csv length of 40 before risking the data going back into the archive before you loop back around to download it
def download(csv_folder, login_user, login_pass):
# authenticate with API
api = SentinelAPI(login_user, login_pass, ‘https://scihub.copernicus.eu/dhus/’)
# create download directory if doesn't exist
download_dir = os.path.join(os.path.expanduser(csv_folder), 'downloads')
if not os.path.exists(download_dir):
os.makedirs(download_dir)
# find all CSV files in folder
filepath_pattern = os.path.join(os.path.expanduser(csv_folder), '**','*.csv')
csv_files = glob.glob(filepath_pattern, recursive=True)
print(csv_files)
# load all csv records into a list
scenes = []
for csv_path in csv_files:
scenes.extend(
list(read_csv(csv_path))
)
print("[INFO] {} scenes found in all CSV files".format(len(scenes)))
# find all donwloaded files in folder
downloaded_files_pattern = os.path.join(download_dir,'**', '*.zip')
downloaded_basenames = [ os.path.basename(downloaded_file) for downloaded_file in glob.glob(downloaded_files_pattern, recursive=True) ]
# get list of items not yet downloaded
all_scene_titles = { scene['title'] for scene in scenes }
downloaded_scene_titles = { os.path.splitext(downloaded_basename)[0] for downloaded_basename in downloaded_basenames }
print("[INFO] {} downloaded scenes found in folder".format(len(downloaded_scene_titles)))
scene_ids = [ scene['uuid'] for scene in scenes if scene['title'] in all_scene_titles.difference(downloaded_scene_titles)]
print("[INFO] {} scenes remain to download".format(len(scene_ids)))
if len(scene_ids) == 0:
print("[INFO] No more files left to download from csv files")
return False
# loop through each scene id and attempt to download.
#Try twice and then move onto the next one, it will be requested from the archive now.
for idx, scene_id in enumerate(scene_ids):
product_info = api.get_product_odata(scene_id)
print("[INFO] Product {} ({}/{}); Online: {}".format(
product_info['id'],
idx+1,
len(scene_ids),
product_info['Online']))
attempts = 0
while attempts < 2:
try:
api.download(product_info['id'], download_dir)
break # if successful, move past
except Exception as e:
print("[ERROR] Request failed.. sleeping for 31 mins before retrying (current time: {})".format(datetime.datetime.now()))
time.sleep(60*31)
attempts += 1
return True