############################################################################### # Program to download realtime hourly AQS data from TCEQ's website # # Written by: Qi Ying (qi.ying AT gmail.com) # Zachry Department of Civil and Environmental Engineering # Texas A&M University # # Last updated: 3/14/2024 # # 3/14/2024: fixed a glitch in getting central standard time when daylight # saving time is in effect. # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . ############################################################################### import pandas as pd import numpy as np import datetime import pytz import os def is_number(s): """ Check if an entry is a number :s: entry to test :return: true/false """ try: float(s) return True except ValueError: return False def get_one_site(icams_id,ih): """ Download data from one CAMS site. :icams_id: CAMS site number :ih: hour of the day (local time) to extract the data :return: 1d array with concentrations of O3, PM25, PM10, SO2, NO2, and CO gas concentrations are in ppb, except CO (ppm), and PM species are in concentrations of ug/m3. """ # species to extract and their position in the output array nspec=6 O3, PM25, PM10, SO2, NO2, CO = 0,1,2,3,4,5 rconc1=np.ones(nspec)*(-999.0) ssite="https://www.tceq.texas.gov/cgi-bin/compliance/monops/daily_summary.pl?cams=%d" % icams_id try: table_data = pd.read_html(ssite,match="Parameter Measured") except: return rconc1 if len(table_data)==0: return rconc1 df=table_data[0] nrow0,ncol0=df.shape nrow_data=nrow0-4 ncol_data=ncol0-2 idx_NO2, idx_O3, idx_SO2, idx_CO, idx_PM25, idx_PM10 = -1,-1,-1,-1,-1,-1 spec_map=np.zeros(nspec)-1 for i in range(nrow_data): var=df.iloc[i][0] if var=="Nitrogen Dioxide": idx_NO2=i spec_map[NO2]=idx_NO2 elif var=="Ozone": idx_O3=i spec_map[O3]=idx_O3 elif var=="Sulfur Dioxide": idx_SO2=i spec_map[SO2]=idx_SO2 elif var=="Carbon Monoxide": idx_CO=i spec_map[CO]=idx_CO elif "PM-2.5" in var: idx_PM25=i spec_map[PM25]=idx_PM25 elif "PM-10" in var: idx_PM10=i spec_map[PM10]=idx_PM10 for i in range(nspec): idx=int(spec_map[i]) if idx<0: continue var=df.iloc[idx][ih] if is_number(var): n=float(var) if n<0: n=-999.0 rconc1[i]=n rconc1=np.nan_to_num(rconc1,nan=-999.0,copy=False) return rconc1 # main program starts here if __name__ == "__main__": save_to_file=False outputfile_prefix="/home/qying/USAAQData/tceq/" sitelist="/home/qying/USAAQData/camslist.txt" cams=np.loadtxt(sitelist,dtype='str',delimiter='\t') ncams=cams.shape[0] now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=-6), 'CST')) dstr = "%2d%02d%02d%02d" % (now.year-2000,now.month,now.day,now.hour-1) ihour=now.hour for isite in range(ncams): cams_sites = [int(i) for i in cams[isite][1].split(',')] airs=cams[isite][0] for icams in cams_sites: rdata=get_one_site(icams,ihour) if max(rdata)<0.0: continue if save_to_file: sfile=outputfile_prefix+"%s.txt" % airs f1=open(sfile,'a') f1.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" % (dstr,\ rdata[0],rdata[1],rdata[2],\ rdata[3],rdata[4],rdata[5])) f1.close() else: print("%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (dstr,airs,icams,\ rdata[0],rdata[1],rdata[2],\ rdata[3],rdata[4],rdata[5]))