libdsproc3  2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dsproc_csv2cds.c
Go to the documentation of this file.
1 /*******************************************************************************
2 *
3 * COPYRIGHT (C) 2013 Battelle Memorial Institute. All Rights Reserved.
4 *
5 ********************************************************************************
6 *
7 * Authors:
8 * name: Brian Ermold
9 * phone: (509) 375-2277
10 * email: brian.ermold@pnl.gov
11 *
12 ********************************************************************************
13 *
14 * REPOSITORY INFORMATION:
15 * $Revision: 67119 $
16 * $Author: ermold $
17 * $Date: 2016-01-27 19:22:37 +0000 (Wed, 27 Jan 2016) $
18 * $State:$
19 *
20 ********************************************************************************
21 *
22 * NOTE: DOXYGEN is used to generate documentation for this file.
23 *
24 *******************************************************************************/
25 
26 /** @file dsproc_csv2cds.c
27  * CSV to CDS Mapping Functions.
28  */
29 
30 #include "dsproc3.h"
31 
32 /*******************************************************************************
33  * Private Data and Functions
34  */
35 /** @privatesection */
36 
37 /*******************************************************************************
38  * Public Functions
39  */
40 /** @publicsection */
41 
42 /**
43  * Add a string to double conversion function to a CSV2CDS Mapping structure.
44  *
45  * The str_to_double function must return a double and set the status value
46  * to non-zero if successful or 0 for an invalid input string.
47  *
48  * @param map pointer to the CSV2CDS Map structure
49  * @param csv_name name of the column in the csv file
50  * @param str_to_dbl function used to convert csv string to a double.
51  *
52  * @retval 1 if the specified csv_name was found
53  * @retval 0 if the specified csv_name was not found
54  */
56  CSV2CDSMap *map,
57  const char *csv_name,
58  double (*str_to_dbl)(const char *strval, int *status))
59 {
60  int mi;
61 
62  for (mi = 0; map[mi].csv_name; ++mi) {
63  if (strcmp(csv_name, map[mi].csv_name) == 0) {
64  map[mi].str_to_dbl = str_to_dbl;
65  return(1);
66  }
67  }
68 
69  return(0);
70 }
71 
72 /**
73  * Map CSVParser data to variables in a CDSGroup.
74  *
75  * If an error occurs in this function it will be appended to the log and
76  * error mail messages, and the process status will be set appropriately.
77  *
78  * @param csv pointer to the CSVParser structure
79  * @param csv_start index of the start record in the CSVParser structure
80  * @param csv_count number of records to map (0 for all)
81  * @param map pointer to the CSV2CDS Map structure
82  * @param cds pointer to the CDSGroup structure
83  * @param cds_start index of the start record in the CDSGroup
84  * @param flags control flags
85  *
86  * @retval 1 if successful
87  * @retval 0 if an error occurred
88  */
90  CSVParser *csv,
91  int csv_start,
92  int csv_count,
93  CSV2CDSMap *map,
94  CDSGroup *cds,
95  int cds_start,
96  int flags)
97 {
98  int *indexes;
99  int status;
100  int ri, idx;
101 
102  if (csv_count <= 0 ||
103  csv_count > csv->nrecs - csv_start) {
104 
105  csv_count = csv->nrecs - csv_start;
106  }
107 
108  indexes = (int *)calloc(csv_count, sizeof(int));
109  if (!indexes) {
110 
112  "Memory allocation error creating CSV to CDS mapping index\n");
113 
115 
116  return(0);
117  }
118 
119  for (ri = 0, idx = csv_start; ri < csv_count; ++ri, ++idx) {
120  indexes[ri] = idx;
121  }
122 
124  csv, indexes, csv_count, map, cds, cds_start, flags);
125 
126  free(indexes);
127 
128  return(status);
129 }
130 
131 /**
132  * Macro used by dsproc_map_csv_to_cds_by_index.
133  */
134 #define CSV_MAP_TO_CDS(data_t, data_p, miss_p, ato_func) \
135 if (csv_str_map) { \
136  for (ri = 0; ri < csv_count; ++ri) { \
137  csvi = csv_indexes[ri]; \
138  is_missing = 0; \
139  if (!csv_strvals[csvi] || *csv_strvals[csvi] == '\0') { \
140  is_missing = 1; \
141  } \
142  else if (csv_missings) { \
143  for (mvi = 0; csv_missings[mvi]; ++mvi) { \
144  if (strcmp(csv_strvals[csvi], csv_missings[mvi]) == 0) { \
145  is_missing = 1; \
146  break; \
147  } \
148  } \
149  } \
150  if (is_missing) { \
151  *data_p++ = *miss_p; \
152  } \
153  else { \
154  for (smi = 0; csv_str_map[smi].strval; ++smi) { \
155  if (strcasecmp(csv_strvals[csvi], csv_str_map[smi].strval) == 0) { \
156  *data_p++ = (data_t)csv_str_map[smi].dblval; \
157  break; \
158  } \
159  } \
160  if (!csv_str_map[smi].strval) { \
161  ERROR( DSPROC_LIB_NAME, \
162  "Invalid '%s' value '%s' in file: %s\n", \
163  csv_name, csv_strvals[csvi], csv->file_name); \
164  dsproc_set_status(DSPROC_ECSV2CDS); \
165  return(0); \
166  } \
167  } \
168  } \
169 } \
170 else if (csv_str_to_dbl) { \
171  for (ri = 0; ri < csv_count; ++ri) { \
172  csvi = csv_indexes[ri]; \
173  is_missing = 0; \
174  if (!csv_strvals[csvi] || *csv_strvals[csvi] == '\0') { \
175  is_missing = 1; \
176  } \
177  else if (csv_missings) { \
178  for (mvi = 0; csv_missings[mvi]; ++mvi) { \
179  if (strcmp(csv_strvals[csvi], csv_missings[mvi]) == 0) { \
180  is_missing = 1; \
181  break; \
182  } \
183  } \
184  } \
185  if (is_missing) { \
186  *data_p++ = *miss_p; \
187  } \
188  else { \
189  *data_p++ = (data_t)csv_str_to_dbl(csv_strvals[csvi], &status); \
190  if (!status) { \
191  ERROR( DSPROC_LIB_NAME, \
192  "Invalid '%s' value '%s' in file: %s\n", \
193  csv_name, csv_strvals[csvi], csv->file_name); \
194  dsproc_set_status(DSPROC_ECSV2CDS); \
195  return(0); \
196  } \
197  } \
198  } \
199 } \
200 else { \
201  for (ri = 0; ri < csv_count; ++ri) { \
202  csvi = csv_indexes[ri]; \
203  is_missing = 0; \
204  if (!csv_strvals[csvi] || *csv_strvals[csvi] == '\0') { \
205  is_missing = 1; \
206  } \
207  else if (csv_missings) { \
208  for (mvi = 0; csv_missings[mvi]; ++mvi) { \
209  if (strcmp(csv_strvals[csvi], csv_missings[mvi]) == 0) { \
210  is_missing = 1; \
211  break; \
212  } \
213  } \
214  } \
215  if (is_missing) { \
216  *data_p++ = *miss_p; \
217  } \
218  else { \
219  *data_p++ = (data_t)ato_func(csv_strvals[csvi]); \
220  } \
221  } \
222 }
223 
224 /**
225  * Map CSVParser data to variables in a CDSGroup using CSV record indexes.
226  *
227  * If an error occurs in this function it will be appended to the log and
228  * error mail messages, and the process status will be set appropriately.
229  *
230  * @param csv pointer to the CSVParser structure
231  * @param csv_indexes indexes of the CSV records
232  * @param csv_count number of indexes
233  * @param map pointer to the CSV2CDS Map structure
234  * @param cds pointer to the CDSGroup structure
235  * @param cds_start index of the start record in the CDSGroup
236  * @param flags control flags
237  *
238  * @retval 1 if successful
239  * @retval 0 if an error occurred
240  */
242  CSVParser *csv,
243  int *csv_indexes,
244  int csv_count,
245  CSV2CDSMap *map,
246  CDSGroup *cds,
247  int cds_start,
248  int flags)
249 {
250  int dynamic_dod = dsproc_get_dynamic_dods_mode();
251  const char *csv_name;
252  const char *csv_units;
253  const char **csv_missings;
254  CSVStrMap *csv_str_map;
255  double (*csv_str_to_dbl)(const char *strval, int *status);
256  int (*csv_set_data)(
257  const char *csv_strval,
258  const char **csv_missings,
259  CDSVar *cds_var,
260  size_t cds_sample_size,
261  CDSData cds_missing,
262  CDSData cds_datap);
263  char **csv_strvals;
264  const char *cds_name;
265  const char *cds_units;
266  CDSVar *cds_var;
267  CDSDim *cds_dim;
268  CDSData cds_data;
269  void *cds_data_start;
270  CDSData cds_missing;
271  int cds_nmissing;
272  int skip_debug_msg;
273  int status;
274  size_t sample_size;
275  size_t type_size;
276  size_t nbytes;
277  int is_missing;
278  float mv;
279  int mi, mvi, ri, smi, csvi;
280 
281  CDSUnitConverter unit_converter;
282 
283  cds_units = (const char *)NULL;
284 
286  "Mapping input CSV data to output dataset variables\n"
287  " - input file: %s\n"
288  " - start index: %d\n"
289  " - num samples: %d\n"
290  " - output dataset: %s\n"
291  " - start index: %d\n",
292  csv->file_name, csv_indexes[0], csv_count, cds->name, cds_start);
293 
294  /* Loop over each entry in the variable map */
295 
296  for (mi = 0; map[mi].csv_name; ++mi) {
297 
298  cds_name = map[mi].cds_name;
299 
300  csv_name = map[mi].csv_name;
301  csv_units = map[mi].csv_units;
302  csv_missings = map[mi].csv_missings;
303 
304  csv_str_map = map[mi].str_map;
305  csv_str_to_dbl = map[mi].str_to_dbl;
306  csv_set_data = map[mi].set_data;
307 
308  skip_debug_msg = 0;
309 
310  /* Get the CSV field */
311 
312  csv_strvals = dsproc_get_csv_field_strvals(csv, csv_name);
313 
314  if (!csv_strvals) {
315 
317  "Required column '%s' not found in CSV file: %s\n",
318  csv_name, csv->file_name);
319 
321  return(0);
322  }
323 
324  /* Get the CDS variable */
325 
326  cds_var = cds_get_var(cds, cds_name);
327 
328  if (!cds_var && dynamic_dod) {
329 
330  /* Define time variable if necessary */
331 
332  cds_dim = cds_get_dim(cds, "time");
333  if (!cds_dim) {
334 
335  cds_dim = cds_define_dim(cds, "time", 0, 1);
336  if (!cds_dim) {
337 
339  "Could not create time dimension in dataset: %s\n",
340  cds_get_object_path(cds));
341 
343  return(0);
344  }
345 
346  cds_var = cds_define_var(cds,
347  "time", CDS_DOUBLE, 1, (const char **)&(cds_dim->name));
348 
349  if (!cds_var) {
350 
352  "Could not create 'time' variable in dataset: %s\n",
353  cds_get_object_path(cds));
354 
356  return(0);
357  }
358  }
359 
360 /* BDE TODO:
361 
362 Loop over CSV values to determine variable type (int, float, or char).
363 Autoset strlen_# dimension length for char type.
364 
365 for (ri = 0; ri < csv_count; ++ri) {
366 
367  csvi = csv_indexes[ri];
368  csv_strvals[csvi],
369 }
370 */
371 
372  /* Define the variable */
373 
374  cds_var = cds_define_var(cds,
375  cds_name, CDS_FLOAT, 1, (const char **)&(cds_dim->name));
376 
377  if (!cds_var) {
378 
380  "Could not create '%s' variable in dataset: %s\n",
381  cds_name, cds_get_object_path(cds));
382 
384  return(0);
385  }
386 
387  /* Define the units attribute */
388 
389  if (csv_units) {
390  if (!cds_define_att_text(cds_var, "units", "%s", csv_units)) {
392  return(0);
393  }
394  }
395 
396  /* Define the missing_values attribute */
397 
398  mv = -9999.0;
399 
400  if (!cds_define_att(cds_var, "missing_value", CDS_FLOAT, 1, &mv)) {
402  return(0);
403  }
404  }
405 
406  if (!cds_var) {
407 
409  "Required variable '%s' not found in dataset: %s\n",
410  cds_name, cds->name);
411 
413  return(0);
414  }
415 
416  /* Check if data already exists in the CDS variable */
417 
418  if (cds_var->sample_count > (size_t)cds_start) {
419 
420  if (flags & CSV_OVERWRITE) {
421 
423  " - * OVERWRITING EXISTING DATA * %s\t-> %s\n",
424  csv_name, cds_name);
425 
426  skip_debug_msg = 1;
427  }
428  else {
430  " - * NOT OVERWRITING EXISTING DATA * %s\t-> %s\n",
431  csv_name, cds_name);
432  continue;
433  }
434  }
435 
436  /* Check if we need to do a unit conversion */
437 
438  unit_converter = (CDSUnitConverter)NULL;
439 
440  if (csv_units) {
441 
442  cds_units = cds_get_var_units(cds_var);
443  if (cds_units) {
444 
445  status = cds_get_unit_converter(csv_units, cds_units, &unit_converter);
446  if (status < 0) {
447 
449  "Could not convert csv units '%s' to cds units '%s'\n",
450  csv_units, cds_units);
451 
453  return(0);
454  }
455  }
456  }
457 
458  /* Get the missing value to use for the CDS variable */
459 
460  cds_missing.vp = (void *)NULL;
461  cds_nmissing = cds_get_var_missing_values(cds_var, &cds_missing.vp);
462 
463  if (cds_nmissing < 0) {
464 
466  "Could not get missing value for variable: %s\n"
467  " -> memory allocation error",
468  cds_var->name);
469 
471 
472  return(0);
473  }
474 
475  if (cds_nmissing == 0) {
476 
477  if (csv_missings) {
478 
480  "Could not get missing value for variable: %s\n"
481  " -> missing_value attribute not defined",
482  cds_var->name);
483 
485 
486  return(0);
487  }
488 
489  cds_missing.vp = calloc(1, sizeof(double));
490  cds_get_default_fill_value(cds_var->type, cds_missing.vp);
491  }
492 
493  /* Map the data from the CSV field to the CDS variable */
494 
495  if (!skip_debug_msg) {
497  " - %s\t-> %s\n",
498  csv_name, cds_name);
499  }
500 
501  cds_data.vp = cds_alloc_var_data(cds_var, cds_start, csv_count);
502  if (!cds_data.vp) {
503 
505  "Memory allocation error mapping CSV dataset to CDS dataset\n");
506 
508  if (cds_missing.vp) free(cds_missing.vp);
509  return(0);
510  }
511 
512  cds_data_start = cds_data.vp;
513 
514  if (csv_set_data) {
515 
516  sample_size = cds_var_sample_size(cds_var);
517  type_size = cds_data_type_size(cds_var->type);
518  nbytes = sample_size * type_size;
519 
520  for (ri = 0; ri < csv_count; ++ri) {
521 
522  csvi = csv_indexes[ri];
523 
524  status = csv_set_data(
525  csv_strvals[csvi],
526  csv_missings,
527  cds_var,
528  sample_size,
529  cds_missing,
530  cds_data);
531 
532  if (status == 0) {
533  if (cds_missing.vp) free(cds_missing.vp);
534  return(0);
535  }
536 
537  cds_data.vp += nbytes;
538  }
539  }
540  else if (cds_var->type == CDS_CHAR) {
541 
542  sample_size = cds_var_sample_size(cds_var);
543 
544  for (ri = 0; ri < csv_count; ++ri) {
545 
546  memset(cds_data.cp, *cds_missing.cp, sample_size);
547 
548  csvi = csv_indexes[ri];
549  is_missing = 0;
550 
551  if (!csv_strvals[csvi] || *csv_strvals[csvi] == '\0') {
552  is_missing = 1;
553  }
554  else if (csv_missings) {
555 
556  for (mvi = 0; csv_missings[mvi]; ++mvi) {
557  if (strcmp(csv_strvals[csvi], csv_missings[mvi]) == 0) {
558  is_missing = 1;
559  break;
560  }
561  }
562  }
563 
564  if (!is_missing) {
565  strncpy(cds_data.cp, csv_strvals[csvi], sample_size);
566  }
567 
568  cds_data.cp += sample_size;
569  }
570  }
571  else {
572 
573  switch (cds_var->type) {
574  case CDS_BYTE: CSV_MAP_TO_CDS(signed char, cds_data.bp, cds_missing.bp, atoi); break;
575  case CDS_SHORT: CSV_MAP_TO_CDS(short, cds_data.sp, cds_missing.sp, atoi); break;
576  case CDS_INT: CSV_MAP_TO_CDS(int, cds_data.ip, cds_missing.ip, atoi); break;
577  case CDS_FLOAT: CSV_MAP_TO_CDS(float, cds_data.fp, cds_missing.fp, atof); break;
578  case CDS_DOUBLE: CSV_MAP_TO_CDS(double, cds_data.dp, cds_missing.dp, atof); break;
579  default:
580 
582  "Could not map CSV data to CDS variable: %s:%s\n"
583  " -> invalid CDSDataType: %d\n",
584  cds->name, cds_var->name, (int)cds_var->type);
585 
587  if (cds_missing.vp) free(cds_missing.vp);
588  return(0);
589  }
590  }
591 
592  /* Convert CSV units to CDS units */
593 
594  if (unit_converter) {
595 
597  " - converting units: '%s' to '%s'\n",
598  csv_units, cds_units);
599 
600  sample_size = cds_var_sample_size(cds_var);
601 
602  cds_convert_units(unit_converter,
603  cds_var->type, // in type
604  csv_count * sample_size, // length,
605  cds_data_start, // void * in_data,
606  cds_var->type, // CDSDataType out_type,
607  cds_data_start, // void * out_data,
608  cds_nmissing, // size_t nmap,
609  cds_missing.vp, // void * in_map,
610  cds_missing.vp, // void * out_map,
611  NULL, NULL, NULL, NULL);
612  }
613 
614  if (cds_missing.vp) free(cds_missing.vp);
615  }
616 
617  return(1);
618 }