2006-11-10 Python Data Wrapping

From Datafedwiki

Jump to: navigation, search

Back to DataFed Development Events

Grid Data: Steps for creating phyton script for wrapping grids:
1. Create empty cubes
2. Read data from text files and fill into cubes
3. Query cubes

[edit] GRID DATA

EDGAR Example

Input file looks like:
-------------------------------------------------------
EMISSIONS ON GRID FOR CARBON MONOXIDE IN 1990

Calculated by EDGAR on 03/05/02 by laeedg
calculation name : G: EDGV32 CO: 90 ALL (EXCL AIR)
(this file created on 03/05/02 11:17:35)
-------------------------------------------------------
EDGAR Inventory 1x1 ALL PROCESSGROUPS
CARBON MONOXIDE 1990 annual # cells: 12032 (<>0)
Values: min: 2.18E+02 max: 5.23E+09 sum: 8.47E+11
Units : kg CO (FMM)/yr avg: 7.04E+07
-------------------------------------------------------
-180,47,2.82E+04
-180,66,8.49E+05
-180,68,8.49E+05
-179,47,2.82E+04


Variables :

[location of the folder containing the text files.]
file_loc = "EmissionData\\WA_4_07\\EmissionInventories\\EDGAR32"

[data lines in the text file starts after number of dash lines]
data_start_after_dashline_num = 3

separator = [",", ";"] # field separator in data file
parameter = ["co", "nmv", "nox", "so2"]
datetime_year = [1990, 1995, 2000]

datetime_num = len(datetime_year)
lat_grid_size = 181
lon_grid_size = 360

cube_folder = "\\\\datafedweb1.seas.wustl.edu\\catalog\\EDGAR\\CUBES\\"
min_date = "1990-01-01"
max_date = "2000-01-01"

cat_time_dim = LinearTimeDimension(TimePeriodicity("year", 5, DateTime.Parse(min_date), max_date))
cat_lat_dim = LinearDoubleDimension("lat", "degrees_north", -90, 90, lat_grid_size)
cat_lon_dim = LinearDoubleDimension("lon", "degrees_east", -180, 179, lon_grid_size)


1. CREATING EMPTY CUBES FROM TEXT FILES:

  • def compile(param_name): [to create a cube, param_name = parameter name e.g. co, so2, nox, nmv listed in parameter list variable (parameter one per cube at a time)]
  • def create_cube(filename): [creates an empty cube; cube file name would be e.g. EDGAR_CO.CUBE]


def compile(param_name):
  param_name = param_name.lower()
  if param_name in parameter:
   cube_file = "CUBES\\EDGAR_" + param_name + ".CUBE"
   #cube_file = "CUBES\\EDGAR_" + param_name + "_lto.CUBE"
   create_cube(cube_file)
   hc = HyperCubeFile.Open(cube_file);
  try:
    for time in xrange(0, datetime_num):
    sr = get_file_stream(time, param_name)
    write_one_datetime(hc, sr, time)
  finally:
   hc.Dispose()
  else:
    print "'" + param_name + "' is not a valid parameter!\nChoose From List : " + repr(parameter)]]


def create_cube(filename):
  if File.Exists(filename):
   File.Delete(filename)
  dims = (0, lat_grid_size, lon_grid_size)
  hc = HyperCube4B(dims, filename)
  hc.Dispose()



2. READ DATA FROM TEXT FILES AND WRITE TO CUBES:
Finding data file:

  • def get_file_stream(year_index, param): [get the text file data based on the year (e.g. 1990-2000) and parameter name (e.g. param = co, so2 etc)]

Writing data in the cube.

  • def write_one_datetime(hc, sr, datetime_index): and def fill_dice(sr): [ write vaules in the cube for one year at a time for the lat, lon ]


def get_file_stream(year_index, param):
  try:
   yr = datetime_year[year_index]
   year = str(yr)
   # scanning only the files/folders with chars '_ant' in their name
   if yr > 1999:
    file_dir = file_loc + "\\"+"FT" + year
    dir_pattern = year +"_"+ param + "_ant"
    if param == "nmv":
     dir_pattern = year +"_"+ param + "oc_ant"
# parameter 'nmv' is 'nmvco' in the directory name e.g. 'edgar_32ft2000_nmvoc_ant_tcm32-22048'
   else:
    file_dir = file_loc + "\\" + year
    year = year[2:4]
    dir_pattern = "ant" + year + param
    file_in = ""
   for dir in os.listdir(file_dir):
   if re.search(dir_pattern, dir):
    file_in = file_dir+"\\"+dir
    break

   file_pattern = dir_pattern +"\."
   #file_pattern = dir_pattern +"_lto"
   #file_pattern2 = dir_pattern +"-lto"


   for fl in os.listdir(file_in):
    #if re.search(file_pattern, fl) or re.search(file_pattern2, fl):
    if re.search(file_pattern, fl):
     file_name = file_in +"\\"+ fl
     break
    print file_name
   sr = StreamReader(file_name)

    return sr
   except:
   print "Year = " + str(datetime_year[year_index]) + " : Param = " + param + " - Thrown Exception in get_file_stream. Error in opening file."

def write_one_datetime(hc, sr, datetime_index):
  dims = hc.GetDims()
  start = Array.CreateInstance(Int32, len(dims))
  g = fill_dice(sr)
  start[0] = datetime_index
  hc.Write(start, g)

def fill_dice(sr):
  g = Array.CreateInstance(System.Single, 1, lat_grid_size, lon_grid_size)
  cnt = 0
  while cnt < data_start_after_dashline_num: # seek: --------------------------------------------
   line = sr.ReadLine()
   if line == None:
   cnt = 3
   raise System.Exception("unexpected end in stream when looking for dash line.")
   if line.StartsWith("------"):
    cnt += 1
  vt = 0

  state = 0
  while state == 0:
   line = sr.ReadLine()
   if line == None:
    state = 1
   else:
    for sep in separator:
    if re.search(sep, line) != None:
    splitter = sep
    break
   val = line.split(splitter)
   vy = int(val[0]) + 180 # longitude
   vx = int(val[1]) + 90 # latitude

   g[ vt, vx, vy ] = Single.Parse(val[2])
  return g


3. QUERYING CUBES:

  • def query(c): [Gets the correct cube file for the c.param_abbr and prepares for reading]
  • def read_cube(hcscr, c): [ Read the cube ]
  • def check_lat_lon(c): [ check if the lat, lon are out of range ]
  • def check_year(c): [ check if year is out of range ]
  • def make_grid(data_cube, c): [ prepare the grid ]

def query(c):
  if do_trace == True:
  trace("EDGAR","start 1") # from dacc
  param = c.param_abbr
  cube_file = cube_folder + "EDGAR_" + param + ".CUBE"
  hcscr = HyperCubeFile.Open(cube_file);
  try:
   data = read_cube(hcscr, c)
   grid = make_grid(data, c)
  finally:
   hcscr.Dispose()
    return grid

def read_cube(hcscr, c):
  lens = hcscr.GetDims()
  start = Array.CreateInstance(Int32, len(lens))

  check_year(c)
  check_lat_lon(c)

  time_idx_min = cat_time_dim.GetClosestIndex(DateTime.Parse(c.time_min))
  time_idx_max = cat_time_dim.GetClosestIndex(DateTime.Parse(c.time_max))
  lat_idx_min = cat_lat_dim.GetClosestIndex(Double.Parse(c.lat_min))
  lat_idx_max = cat_lat_dim.GetClosestIndex(Double.Parse(c.lat_max))
  lon_idx_min = cat_lon_dim.GetClosestIndex(Double.Parse(c.lon_min))
  lon_idx_max = cat_lon_dim.GetClosestIndex(Double.Parse(c.lon_max))

  start[0] = time_idx_min
  start[1] = lat_idx_min
  start[2] = lon_idx_min
  lens[0] = time_idx_max-time_idx_min+1
  lens[1] = lat_idx_max-lat_idx_min+1
  lens[2] = lon_idx_max-lon_idx_min+1

  data = hcscr.Read(start, lens)

  return data

def check_lat_lon(c):
  lat_min = cat_lat_dim.GetCoordinate(0)
  lat_max = cat_lat_dim.GetLastCoordinate()
  lon_min = cat_lon_dim.GetCoordinate(0)
  lon_max = cat_lon_dim.GetLastCoordinate()

  if Double.Parse(c.lat_min) < lat_min:
   c.lat_min = str(lat_min)
  if Double.Parse(c.lat_max) > lat_max:
   c.lat_max = str(lat_max)
  if Double.Parse(c.lon_min) < lon_min:
   c.lon_min = str(lon_min)
  if Double.Parse(c.lon_max) > lon_max:
   c.lon_max = str(lon_max)

def check_year(c):
  time_min = DateTime.Parse(c.time_min).Year - 1970
  time_max = DateTime.Parse(c.time_max).Year - 1970
  min_dt = time.localtime( cat_time_dim.GetCoordinate(0) / 1000 )
  max_dt = time.localtime( (cat_time_dim.GetCoordinate(datetime_num - 1)) / 1000)
  min_dt_year = min_dt[0]
  max_dt_year = max_dt[0]

  if time_min != min_dt_year:
   c.time_min = min_date
  if time_max != max_dt_year:
   c.time_max = max_date

def make_grid(data_cube, c):
  time_dim = LinearTimeDimension(TimePeriodicity("year", 5, DateTime.Parse(min_date), max_date));
  lat_dim = LinearDoubleDimension("lat", "degrees_north", Double.Parse(c.lat_min), Double.Parse(c.lat_max), data_cube.GetLength(1))
  lon_dim = LinearDoubleDimension("lon", "degrees_east", Double.Parse(c.lon_min), Double.Parse(c.lon_max), data_cube.GetLength(2))
  dims = System.Array.CreateArray(Dimension, [time_dim, lat_dim, lon_dim])
  return HttpDataCubeM(c.param_abbr, c.param_unit, data_cube, dims);

[edit] POINT DATA

NPRI

1. Create Location Table (mvLocation) in Access DB
2. Create the Pollutant Table (mvSubsRele) in Access DB
3. Query on the tables

Variables:
connection string to the access database
connectstr="Provider=Microsoft.Jet.OLEDB.4.0;Data Source=\\\\datafedweb1\\catalog\\NPRI\\DATA\\Canada_NPRI.mdb"

SQL statements
sqlStmt_map = """ SELECT mvLocation.NPRI_ID AS loc_code, mvLocation.LATI_DEC as lat, mvLocation.LONG_DEC as lon, mvLocation.city, mvLocation.province, mvSubsRele.AirSta_V as [[param_abbr]], mvSubsRele.Total_Air as total FROM mvLocation INNER JOIN mvSubsRele ON mvLocation.NPRI_ID = mvSubsRele.NPRI_ID WHERE ( (mvLocation.LATI_DEC) IS NOT NULL ) AND ( (mvLocation.LONG_DEC) IS NOT NULL ) AND ( (mvSubsRele.AirSta_V) IS NOT NULL ) AND (mvSubsRele.ReportYear = [[yyyy]]) AND (mvSubsRele.CAS_Number = [[query_param]]); """

sqlStmt_time = """ SELECT mvSubsRele.REPORTYEAR, mvSubsRele.AirSta_V as [[param_abbr]] FROM mvSubsRele WHERE ( mvSubsRele.CAS_NUMBER = [[query_param]]' ) AND ( mvSubsRele.NPRI_ID = [['loc_code]]') AND ( (mvSubsRele.AirSta_V) IS NOT NULL) AND (mvSubsRele.ReportYear between [[yyyy_min]] and [[yyyy_max]] ) ORDER BY mvSubsRele.REPORTYEAR ASC """

READING DATA:

  • def query(c):

- open the connection to the database
- read data based on the sql statement

def query(c):
  dbconn= System.Data.OleDb.OleDbConnection(connectstr)
  dbconn.Open()
  try:
   if c.time_min == c.time_max:
   sqlStmt = sqlStmt_map
   sqlStmt = sqlStmt.Replace("param_abbr", c.param_abbr)
   sqlStmt = sqlStmt.Replace("yyyy", str(c.time_min.Year))
   sqlStmt = sqlStmt.Replace("query_param", c.query_param)
  else:
   sqlStmt = sqlStmt_time    sqlStmt = sqlStmt.Replace("param_abbr", c.param_abbr)
   sqlStmt = sqlStmt.Replace("query_param", c.query_param)
   sqlStmt = sqlStmt.Replace("loc_code", c.loc_code)
   sqlStmt = sqlStmt.Replace("yyyy_min", str(c.time_min.Year))
   sqlStmt = sqlStmt.Replace("yyyy_max", str(c.time_max.Year))

  da = System.Data.OleDb.OleDbDataAdapter(sqlStmt, dbconn)
  ds = System.Data.DataSet("NPRI")
  gr = ds.Tables.Add("granule")

  da.Fill(ds, "granule")

  if c.time_min == c.time_max:
   pass
  else:
   gr.Columns.Add("datetime", System.DateTime)
   for r in gr.Rows:
    r["datetime"] = DateTime(r["reportyear"], 1, 1)
   gr.Columns.Remove("reportyear")

  finally:
   dbconn.Close()

  return ds;

[edit] IMAGE DATA


NAAPS_AUST

  • NAAPS_AUST.xml : Defined (i) the parameters (ii) python file to be called
  • NAAPS_AUST.page : Defined (i) constants and (ii) xml files for map view
  • NAAPS_AUST_map.xml : Defined the services


NAAPS_AUST.py :

  • Data for the DataSet are declared at the top. These are the sets of data need to be changed when we use this python code for a new datasent.

The dataset is prepared by the function: def query(c):
The legend is prepared by the function: def legend(c):


Variables:
data_lookup = {
  "AOT":{
   "src_margin_bottom":466,
   "src_margin_left":20,
   "src_margin_right":426,
   "src_margin_top":60
  },
  "DUST":{
   "src_margin_bottom":60,
   "src_margin_left":20,
   "src_margin_right":426,
   "src_margin_top":466
  },
  "SMOK":{
   "src_margin_bottom":60,
   "src_margin_left":426,
   "src_margin_right":20,
   "src_margin_top":466
  },
  "SULF":{
   "src_margin_bottom":466,
   "src_margin_left":426,
   "src_margin_right":20,
   "src_margin_top":60
  }
}

legend_lookup = {
  "AOT":{
   "bottom":405,
   "left":20,
   "right":420,
   "top":352
  },
  "DUST":{
   "bottom":5,
   "left":20,
   "right":420,
   "top":755
  },
  "SMOK":{
   "bottom":5,
   "left":425,
   "right":20,
   "top":755
  },
  "SULF":{
   "bottom":405,
   "left":425,
   "right":20,
   "top":352
  }
}

template_url="http://www.nrlmry.navy.mil/aerosol/globaer/ops_01/australia/[[yyyy]][[mm]]/[[yyyy]][[mm]][[dd]][[hh]]_globaer_ops_australia.gif"

constant_info_lookup = {
  "src_image_width":364,
  "src_image_height":284,
  "src_lat_min":-45,
  "src_lat_max":-10,
  "src_lon_min":110,
  "src_lon_max":160,   "dataset_abbr":"NAAPS_AUST",
  "bgcolor":"rgb(230,245,250)",
  "transp_colors":"RGB(255,255,255);RGB(255,0,0)",
  "image_format":"image/png",
  "template_url":template_url
}


def query(c):
  params = {}
  params.update(constant_info_lookup)
  params.update(data_lookup[c.param_abbr])
  ds = dacc.make_datetemplate_image_dataset(c, params)

  return ds

def legend(c):
  real_url = dacc.replace_datetime_templates(template_url, c.datetime)
  leg_url = dacc.make_crop_url(real_url, legend_lookup[c.param_abbr])
  return Uri(leg_url)

REUSABILITY:

  • Reusable functions are defined in the dacc module.

1. The map view is prepared by function: make_datetemplate_image_dataset(c, param_lookup)
2. The legend is prepared by function: make_crop_url(source_url, legend_lookup):
3. These functions are reused in the other NAAPS datasets.


def make_datetemplate_image_dataset(c, param_lookup):
  ds = create_image_dataset("DateTemplate")
  gr = ds.Tables["granule"]
  r = gr.NewRow()
  r["param_abbr"] = c.param_abbr
  r["datetime"] = c.datetime.ToString("yyyy-MM-ddTHH:mm:ss")
  r["template_url"] = param_lookup["template_url"]
  r["source_url"] = replace_datetime_templates(param_lookup["template_url"], c.datetime)
  fill_row_from_dict(r, param_lookup)
  gr.Rows.Add(r)
  return ds

def make_crop_url(source_url, legend_lookup):
  crop_template = "http://datafed2.seas.wustl.edu/dvoy_services/crop.wsfl?left=[[left]]&right=[[right]]&top=[[top]]&bottom=[[bottom]]&source="
  leg_url = crop_template + source_url
  for pp in legend_lookup:
   leg_url = leg_url.Replace("" + pp + "", legend_lookup[pp].ToString())
  return leg_url

Personal tools
Workspaces
Clicky Web Analytics