2006-11-10 Python Data Wrapping
From Datafedwiki
Back to DataFed Development Events
Grid Data:
Steps for creating phyton script for wrapping grids:
1. Create empty cubes
2. Read data from text files and fill into cubes
3. Query cubes
[edit] GRID DATA
EDGAR Example
Input file looks like:
-------------------------------------------------------
EMISSIONS ON GRID FOR CARBON MONOXIDE IN 1990
Calculated by EDGAR on 03/05/02 by laeedg
calculation name : G: EDGV32 CO: 90 ALL (EXCL AIR)
(this file created on 03/05/02 11:17:35)
-------------------------------------------------------
EDGAR Inventory 1x1 ALL PROCESSGROUPS
CARBON MONOXIDE 1990 annual # cells: 12032 (<>0)
Values: min: 2.18E+02 max: 5.23E+09 sum: 8.47E+11
Units : kg CO (FMM)/yr avg: 7.04E+07
-------------------------------------------------------
-180,47,2.82E+04
-180,66,8.49E+05
-180,68,8.49E+05
-179,47,2.82E+04
Variables :
[location of the folder containing the text files.]
file_loc = "EmissionData\\WA_4_07\\EmissionInventories\\EDGAR32"
[data lines in the text file starts after number of dash lines]
data_start_after_dashline_num = 3
separator = [",", ";"] # field separator in data file
parameter = ["co", "nmv", "nox", "so2"]
datetime_year = [1990, 1995, 2000]
datetime_num = len(datetime_year)
lat_grid_size = 181
lon_grid_size = 360
cube_folder = "\\\\datafedweb1.seas.wustl.edu\\catalog\\EDGAR\\CUBES\\"
min_date = "1990-01-01"
max_date = "2000-01-01"
cat_time_dim = LinearTimeDimension(TimePeriodicity("year", 5, DateTime.Parse(min_date), max_date))
cat_lat_dim = LinearDoubleDimension("lat", "degrees_north", -90, 90, lat_grid_size)
cat_lon_dim = LinearDoubleDimension("lon", "degrees_east", -180, 179, lon_grid_size)
1. CREATING EMPTY CUBES FROM TEXT FILES:
- def compile(param_name): [to create a cube, param_name = parameter name e.g. co, so2, nox, nmv listed in parameter list variable (parameter one per cube at a time)]
- def create_cube(filename): [creates an empty cube; cube file name would be e.g. EDGAR_CO.CUBE]
def compile(param_name):
param_name = param_name.lower()
if param_name in parameter:
cube_file = "CUBES\\EDGAR_" + param_name + ".CUBE"
#cube_file = "CUBES\\EDGAR_" + param_name + "_lto.CUBE"
create_cube(cube_file)
hc = HyperCubeFile.Open(cube_file);
try:
for time in xrange(0, datetime_num):
sr = get_file_stream(time, param_name)
write_one_datetime(hc, sr, time)
finally:
hc.Dispose()
else:
print "'" + param_name + "' is not a valid parameter!\nChoose From List : " + repr(parameter)]]
def create_cube(filename):
if File.Exists(filename):
File.Delete(filename)
dims = (0, lat_grid_size, lon_grid_size)
hc = HyperCube4B(dims, filename)
hc.Dispose()
2. READ DATA FROM TEXT FILES AND WRITE TO CUBES:
Finding data file:
- def get_file_stream(year_index, param): [get the text file data based on the year (e.g. 1990-2000) and parameter name (e.g. param = co, so2 etc)]
Writing data in the cube.
- def write_one_datetime(hc, sr, datetime_index): and def fill_dice(sr): [ write vaules in the cube for one year at a time for the lat, lon ]
def get_file_stream(year_index, param):
try:
yr = datetime_year[year_index]
year = str(yr)
# scanning only the files/folders with chars '_ant' in their name
if yr > 1999:
file_dir = file_loc + "\\"+"FT" + year
dir_pattern = year +"_"+ param + "_ant"
if param == "nmv":
dir_pattern = year +"_"+ param + "oc_ant"
# parameter 'nmv' is 'nmvco' in the directory name e.g. 'edgar_32ft2000_nmvoc_ant_tcm32-22048'
else:
file_dir = file_loc + "\\" + year
year = year[2:4]
dir_pattern = "ant" + year + param
file_in = ""
for dir in os.listdir(file_dir):
if re.search(dir_pattern, dir):
file_in = file_dir+"\\"+dir
break
file_pattern = dir_pattern +"\."
#file_pattern = dir_pattern +"_lto"
#file_pattern2 = dir_pattern +"-lto"
for fl in os.listdir(file_in):
#if re.search(file_pattern, fl) or re.search(file_pattern2, fl):
if re.search(file_pattern, fl):
file_name = file_in +"\\"+ fl
break
print file_name
sr = StreamReader(file_name)
return sr
except:
print "Year = " + str(datetime_year[year_index]) + " : Param = " + param + " - Thrown Exception in get_file_stream. Error in opening file."
def write_one_datetime(hc, sr, datetime_index):
dims = hc.GetDims()
start = Array.CreateInstance(Int32, len(dims))
g = fill_dice(sr)
start[0] = datetime_index
hc.Write(start, g)
def fill_dice(sr):
g = Array.CreateInstance(System.Single, 1, lat_grid_size, lon_grid_size)
cnt = 0
while cnt < data_start_after_dashline_num: # seek: --------------------------------------------
line = sr.ReadLine()
if line == None:
cnt = 3
raise System.Exception("unexpected end in stream when looking for dash line.")
if line.StartsWith("------"):
cnt += 1
vt = 0
state = 0
while state == 0:
line = sr.ReadLine()
if line == None:
state = 1
else:
for sep in separator:
if re.search(sep, line) != None:
splitter = sep
break
val = line.split(splitter)
vy = int(val[0]) + 180 # longitude
vx = int(val[1]) + 90 # latitude
g[ vt, vx, vy ] = Single.Parse(val[2])
return g
3. QUERYING CUBES:
- def query(c): [Gets the correct cube file for the c.param_abbr and prepares for reading]
- def read_cube(hcscr, c): [ Read the cube ]
- def check_lat_lon(c): [ check if the lat, lon are out of range ]
- def check_year(c): [ check if year is out of range ]
- def make_grid(data_cube, c): [ prepare the grid ]
def query(c):
if do_trace == True:
trace("EDGAR","start 1") # from dacc
param = c.param_abbr
cube_file = cube_folder + "EDGAR_" + param + ".CUBE"
hcscr = HyperCubeFile.Open(cube_file);
try:
data = read_cube(hcscr, c)
grid = make_grid(data, c)
finally:
hcscr.Dispose()
return grid
def read_cube(hcscr, c):
lens = hcscr.GetDims()
start = Array.CreateInstance(Int32, len(lens))
check_year(c)
check_lat_lon(c)
time_idx_min = cat_time_dim.GetClosestIndex(DateTime.Parse(c.time_min))
time_idx_max = cat_time_dim.GetClosestIndex(DateTime.Parse(c.time_max))
lat_idx_min = cat_lat_dim.GetClosestIndex(Double.Parse(c.lat_min))
lat_idx_max = cat_lat_dim.GetClosestIndex(Double.Parse(c.lat_max))
lon_idx_min = cat_lon_dim.GetClosestIndex(Double.Parse(c.lon_min))
lon_idx_max = cat_lon_dim.GetClosestIndex(Double.Parse(c.lon_max))
start[0] = time_idx_min
start[1] = lat_idx_min
start[2] = lon_idx_min
lens[0] = time_idx_max-time_idx_min+1
lens[1] = lat_idx_max-lat_idx_min+1
lens[2] = lon_idx_max-lon_idx_min+1
data = hcscr.Read(start, lens)
return data
def check_lat_lon(c):
lat_min = cat_lat_dim.GetCoordinate(0)
lat_max = cat_lat_dim.GetLastCoordinate()
lon_min = cat_lon_dim.GetCoordinate(0)
lon_max = cat_lon_dim.GetLastCoordinate()
if Double.Parse(c.lat_min) < lat_min:
c.lat_min = str(lat_min)
if Double.Parse(c.lat_max) > lat_max:
c.lat_max = str(lat_max)
if Double.Parse(c.lon_min) < lon_min:
c.lon_min = str(lon_min)
if Double.Parse(c.lon_max) > lon_max:
c.lon_max = str(lon_max)
def check_year(c):
time_min = DateTime.Parse(c.time_min).Year - 1970
time_max = DateTime.Parse(c.time_max).Year - 1970
min_dt = time.localtime( cat_time_dim.GetCoordinate(0) / 1000 )
max_dt = time.localtime( (cat_time_dim.GetCoordinate(datetime_num - 1)) / 1000)
min_dt_year = min_dt[0]
max_dt_year = max_dt[0]
if time_min != min_dt_year:
c.time_min = min_date
if time_max != max_dt_year:
c.time_max = max_date
def make_grid(data_cube, c):
time_dim = LinearTimeDimension(TimePeriodicity("year", 5, DateTime.Parse(min_date), max_date));
lat_dim = LinearDoubleDimension("lat", "degrees_north", Double.Parse(c.lat_min), Double.Parse(c.lat_max), data_cube.GetLength(1))
lon_dim = LinearDoubleDimension("lon", "degrees_east", Double.Parse(c.lon_min), Double.Parse(c.lon_max), data_cube.GetLength(2))
dims = System.Array.CreateArray(Dimension, [time_dim, lat_dim, lon_dim])
return HttpDataCubeM(c.param_abbr, c.param_unit, data_cube, dims);
[edit] POINT DATA
NPRI
1. Create Location Table (mvLocation) in Access DB
2. Create the Pollutant Table (mvSubsRele) in Access DB
3. Query on the tables
Variables:
connection string to the access database
connectstr="Provider=Microsoft.Jet.OLEDB.4.0;Data Source=\\\\datafedweb1\\catalog\\NPRI\\DATA\\Canada_NPRI.mdb"
SQL statements
sqlStmt_map = """
SELECT
mvLocation.NPRI_ID AS loc_code,
mvLocation.LATI_DEC as lat,
mvLocation.LONG_DEC as lon,
mvLocation.city,
mvLocation.province,
mvSubsRele.AirSta_V as [[param_abbr]],
mvSubsRele.Total_Air as total
FROM mvLocation
INNER JOIN mvSubsRele ON mvLocation.NPRI_ID = mvSubsRele.NPRI_ID
WHERE
( (mvLocation.LATI_DEC) IS NOT NULL )
AND ( (mvLocation.LONG_DEC) IS NOT NULL )
AND ( (mvSubsRele.AirSta_V) IS NOT NULL )
AND (mvSubsRele.ReportYear = [[yyyy]])
AND (mvSubsRele.CAS_Number = [[query_param]]);
"""
sqlStmt_time = """
SELECT mvSubsRele.REPORTYEAR, mvSubsRele.AirSta_V as [[param_abbr]]
FROM mvSubsRele
WHERE ( mvSubsRele.CAS_NUMBER = [[query_param]]' )
AND ( mvSubsRele.NPRI_ID = [['loc_code]]')
AND ( (mvSubsRele.AirSta_V) IS NOT NULL)
AND (mvSubsRele.ReportYear between [[yyyy_min]] and [[yyyy_max]] )
ORDER BY mvSubsRele.REPORTYEAR ASC
"""
READING DATA:
- def query(c):
- open the connection to the database
- read data based on the sql statement
def query(c):
dbconn= System.Data.OleDb.OleDbConnection(connectstr)
dbconn.Open()
try:
if c.time_min == c.time_max:
sqlStmt = sqlStmt_map
sqlStmt = sqlStmt.Replace("param_abbr", c.param_abbr)
sqlStmt = sqlStmt.Replace("yyyy", str(c.time_min.Year))
sqlStmt = sqlStmt.Replace("query_param", c.query_param)
else:
sqlStmt = sqlStmt_time
sqlStmt = sqlStmt.Replace("param_abbr", c.param_abbr)
sqlStmt = sqlStmt.Replace("query_param", c.query_param)
sqlStmt = sqlStmt.Replace("loc_code", c.loc_code)
sqlStmt = sqlStmt.Replace("yyyy_min", str(c.time_min.Year))
sqlStmt = sqlStmt.Replace("yyyy_max", str(c.time_max.Year))
da = System.Data.OleDb.OleDbDataAdapter(sqlStmt, dbconn)
ds = System.Data.DataSet("NPRI")
gr = ds.Tables.Add("granule")
da.Fill(ds, "granule")
if c.time_min == c.time_max:
pass
else:
gr.Columns.Add("datetime", System.DateTime)
for r in gr.Rows:
r["datetime"] = DateTime(r["reportyear"], 1, 1)
gr.Columns.Remove("reportyear")
finally:
dbconn.Close()
return ds;
[edit] IMAGE DATA
NAAPS_AUST
- NAAPS_AUST.xml : Defined (i) the parameters (ii) python file to be called
- NAAPS_AUST.page : Defined (i) constants and (ii) xml files for map view
- NAAPS_AUST_map.xml : Defined the services
NAAPS_AUST.py :
- Data for the DataSet are declared at the top. These are the sets of data need to be changed when we use this python code for a new datasent.
The dataset is prepared by the function: def query(c):
The legend is prepared by the function: def legend(c):
Variables:
data_lookup = {
"AOT":{
"src_margin_bottom":466,
"src_margin_left":20,
"src_margin_right":426,
"src_margin_top":60
},
"DUST":{
"src_margin_bottom":60,
"src_margin_left":20,
"src_margin_right":426,
"src_margin_top":466
},
"SMOK":{
"src_margin_bottom":60,
"src_margin_left":426,
"src_margin_right":20,
"src_margin_top":466
},
"SULF":{
"src_margin_bottom":466,
"src_margin_left":426,
"src_margin_right":20,
"src_margin_top":60
}
}
legend_lookup = {
"AOT":{
"bottom":405,
"left":20,
"right":420,
"top":352
},
"DUST":{
"bottom":5,
"left":20,
"right":420,
"top":755
},
"SMOK":{
"bottom":5,
"left":425,
"right":20,
"top":755
},
"SULF":{
"bottom":405,
"left":425,
"right":20,
"top":352
}
}
template_url="http://www.nrlmry.navy.mil/aerosol/globaer/ops_01/australia/[[yyyy]][[mm]]/[[yyyy]][[mm]][[dd]][[hh]]_globaer_ops_australia.gif"
constant_info_lookup = {
"src_image_width":364,
"src_image_height":284,
"src_lat_min":-45,
"src_lat_max":-10,
"src_lon_min":110,
"src_lon_max":160,
"dataset_abbr":"NAAPS_AUST",
"bgcolor":"rgb(230,245,250)",
"transp_colors":"RGB(255,255,255);RGB(255,0,0)",
"image_format":"image/png",
"template_url":template_url
}
def query(c):
params = {}
params.update(constant_info_lookup)
params.update(data_lookup[c.param_abbr])
ds = dacc.make_datetemplate_image_dataset(c, params)
return ds
def legend(c):
real_url = dacc.replace_datetime_templates(template_url, c.datetime)
leg_url = dacc.make_crop_url(real_url, legend_lookup[c.param_abbr])
return Uri(leg_url)
REUSABILITY:
- Reusable functions are defined in the dacc module.
1. The map view is prepared by function: make_datetemplate_image_dataset(c, param_lookup)
2. The legend is prepared by function: make_crop_url(source_url, legend_lookup):
3. These functions are reused in the other NAAPS datasets.
def make_datetemplate_image_dataset(c, param_lookup):
ds = create_image_dataset("DateTemplate")
gr = ds.Tables["granule"]
r = gr.NewRow()
r["param_abbr"] = c.param_abbr
r["datetime"] = c.datetime.ToString("yyyy-MM-ddTHH:mm:ss")
r["template_url"] = param_lookup["template_url"]
r["source_url"] = replace_datetime_templates(param_lookup["template_url"], c.datetime)
fill_row_from_dict(r, param_lookup)
gr.Rows.Add(r)
return ds
def make_crop_url(source_url, legend_lookup):
crop_template = "http://datafed2.seas.wustl.edu/dvoy_services/crop.wsfl?left=[[left]]&right=[[right]]&top=[[top]]&bottom=[[bottom]]&source="
leg_url = crop_template + source_url
for pp in legend_lookup:
leg_url = leg_url.Replace("" + pp + "", legend_lookup[pp].ToString())
return leg_url
Categories: DevEvents | Atomic | Yymmdd
