###############################################################################
# Program to download realtime hourly AQS data from TCEQ's website
#
# Written by: Qi Ying (qi.ying AT gmail.com)
#             Zachry Department of Civil and Environmental Engineering
#             Texas A&M University
#           
# Last updated: 3/14/2024
# 
#   3/14/2024: fixed a glitch in getting central standard time when daylight
#              saving time is in effect.
#
# This program is free software: you can redistribute it and/or modify it under 
# the terms of the GNU General Public License as published by the Free Software 
# Foundation, either version 3 of the License, or (at your option) any later 
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT 
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with 
# this program. If not, see <https://www.gnu.org/licenses/>.
###############################################################################
import pandas as pd
import numpy as np
import datetime
import pytz
import os

def is_number(s):
    """ 
    Check if an entry is a number

    :s: entry to test
    :return: true/false
    """
    try:
        float(s)
        return True
    except ValueError:
        return False


def get_one_site(icams_id,ih):
    """
    Download data from one CAMS site.

    :icams_id: CAMS site number
    :ih: hour of the day (local time) to extract the data
    :return: 1d array with concentrations of O3, PM25, PM10, SO2, NO2, and CO
    gas concentrations are in ppb, except CO (ppm), and PM species
    are in concentrations of ug/m3.
    """
    # species to extract and their position in the output array
    nspec=6
    O3, PM25, PM10, SO2, NO2, CO = 0,1,2,3,4,5
    
    rconc1=np.ones(nspec)*(-999.0)
    ssite="https://www.tceq.texas.gov/cgi-bin/compliance/monops/daily_summary.pl?cams=%d" % icams_id
    try:
        table_data = pd.read_html(ssite,match="Parameter Measured")
    except:
        return rconc1
    if len(table_data)==0:
        return rconc1
    df=table_data[0]
    nrow0,ncol0=df.shape
    nrow_data=nrow0-4
    ncol_data=ncol0-2

    idx_NO2, idx_O3, idx_SO2, idx_CO, idx_PM25, idx_PM10 = -1,-1,-1,-1,-1,-1
    spec_map=np.zeros(nspec)-1
    for i in range(nrow_data):
        var=df.iloc[i][0]
        if var=="Nitrogen Dioxide":
            idx_NO2=i
            spec_map[NO2]=idx_NO2
        elif var=="Ozone":
            idx_O3=i
            spec_map[O3]=idx_O3
        elif var=="Sulfur Dioxide":
            idx_SO2=i
            spec_map[SO2]=idx_SO2
        elif var=="Carbon Monoxide":
            idx_CO=i
            spec_map[CO]=idx_CO
        elif "PM-2.5" in var:
            idx_PM25=i
            spec_map[PM25]=idx_PM25
        elif "PM-10" in var:
            idx_PM10=i
            spec_map[PM10]=idx_PM10

    for i in range(nspec):
        idx=int(spec_map[i])
        if idx<0:
            continue
        var=df.iloc[idx][ih]
        if is_number(var):
            n=float(var)
            if n<0:
                n=-999.0
            rconc1[i]=n
    rconc1=np.nan_to_num(rconc1,nan=-999.0,copy=False)
    return rconc1

# main program starts here
if __name__ == "__main__":

    save_to_file=False
    outputfile_prefix="/home/qying/USAAQData/tceq/"

    sitelist="/home/qying/USAAQData/camslist.txt"
    cams=np.loadtxt(sitelist,dtype='str',delimiter='\t')
    ncams=cams.shape[0]

    now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=-6), 'CST'))
    dstr = "%2d%02d%02d%02d" % (now.year-2000,now.month,now.day,now.hour-1)
    ihour=now.hour

    for isite in range(ncams):
        cams_sites = [int(i) for i in cams[isite][1].split(',')]
        airs=cams[isite][0]
        for icams in cams_sites:
            rdata=get_one_site(icams,ihour)
            if max(rdata)<0.0: 
                continue
            if save_to_file:
                sfile=outputfile_prefix+"%s.txt" % airs
                f1=open(sfile,'a')
                f1.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n" % (dstr,\
                                                          rdata[0],rdata[1],rdata[2],\
                                                          rdata[3],rdata[4],rdata[5]))
                f1.close()
            else:
                print("%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (dstr,airs,icams,\
                                                              rdata[0],rdata[1],rdata[2],\
                                                              rdata[3],rdata[4],rdata[5]))
