libdsproc3  2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dsproc_csv_ingest_config.c
Go to the documentation of this file.
1 /*******************************************************************************
2 *
3 * COPYRIGHT (C) 2013 Battelle Memorial Institute. All Rights Reserved.
4 *
5 ********************************************************************************
6 *
7 * Authors:
8 * name: Brian Ermold
9 * phone: (509) 375-2277
10 * email: brian.ermold@pnl.gov
11 *
12 ********************************************************************************
13 *
14 * REPOSITORY INFORMATION:
15 * $Revision: 68036 $
16 * $Author: ermold $
17 * $Date: 2016-03-13 22:21:35 +0000 (Sun, 13 Mar 2016) $
18 * $State:$
19 *
20 ********************************************************************************
21 *
22 * NOTE: DOXYGEN is used to generate documentation for this file.
23 *
24 *******************************************************************************/
25 
26 /** @file dsproc_csv_ingest_config.c
27  * Functions for Reading CSV Ingest Configuration Files.
28  */
29 
30 #include "dsproc3.h"
31 
32 /*******************************************************************************
33  * Private Data and Functions
34  */
35 /** @privatesection */
36 
37 /**
38  * List of config file key words.
39  */
40 const char *_ConfKeys[] =
41 {
42  "FILE_NAME_PATTERNS",
43  "FILE_TIME_PATTERNS",
44  "DELIMITER",
45  "HEADER_LINE",
46  "HEADER_LINE_TAG",
47  "HEADER_LINE_NUMBER",
48  "NUMBER_OF_HEADER_LINES",
49  "NUMBER_OF_COLUMNS",
50  "TIME_COLUMN_PATTERNS",
51  "SPLIT_INTERVAL",
52  "FIELD_MAP",
53  NULL
54 };
55 
56 /**
57  * Get the time from a CSV Ingest configuration file name.
58  *
59  * @param file_name the name of the file
60  *
61  * @retval time seconds since 1970
62  * @retval 0 if an error occurred
63  */
64 static time_t _csv_get_conf_file_name_time(const char *file_name)
65 {
66  int YYYY, MM, DD, hh, mm, ss;
67  int count;
68  char *chrp;
69 
70  /* File names look like:
71  *
72  * SSSnameF#.YYYYMMDD.hhmmss.csv_conf
73  *
74  * or:
75  *
76  * SSSnameF#.dl.YYYYMMDD.hhmmss.csv_conf
77  */
78 
79  chrp = strrchr(file_name, '.');
80  if (!chrp) return(0);
81 
82  for (count = 0; count < 2; ++count) {
83  if (chrp != file_name) {
84  for (--chrp; *chrp != '.' && chrp != file_name; --chrp);
85  }
86  }
87 
88  if (*chrp == '.') ++chrp;
89 
90  YYYY = MM = DD = hh = mm = ss = 0;
91 
92  sscanf(chrp, "%4d%2d%2d.%2d%2d%2d", &YYYY, &MM, &DD, &hh, &mm, &ss);
93 
94  return(get_secs1970(YYYY, MM, DD, hh, mm, ss));
95 }
96 
97 /**
98  * Get the list of search paths for CSV Ingest configuration files.
99  *
100  * If an error occurs in this function it will be appended to the log and
101  * error mail messages, and the process status will be set appropriately.
102  *
103  * @param conf pointer to the CSVConf structure.
104  *
105  * @param flags control flags:
106  *
107  * - CSV_CHECK_DATA_CONF check for config files under the root directory
108  * defined by the CONF_DATA environment variable.
109  *
110  * @param npaths output: number of search paths.
111  * @param paths output: list of search paths.
112  *
113  * @retval 1 if successful
114  * @retval 0 if error occurred
115  */
116 static int _csv_get_conf_search_paths(CSVConf *conf, int flags, int *npaths, const char ***paths)
117 {
118  const char *proc_name = conf->proc;
119  const char *conf_data = (const char *)NULL;
120  const char *ingest_home;
121 
122  int search_npaths;
123  char **search_paths;
124  char path[PATH_MAX];
125  int pi;
126 
127  /* Check if the conf->file_path has already been set */
128 
129  if (conf->file_path) {
130 
131  *paths = &(conf->file_path);
132  *npaths = 1;
133 
134  return(1);
135  }
136 
137  /* Check if the search paths have already been set */
138 
139  if (conf->search_npaths) {
140 
141  *npaths = conf->search_npaths;
142  *paths = conf->search_paths;
143 
144  return(1);
145  }
146 
147  /* Create list of default search paths */
148 
149  ingest_home = getenv("INGEST_HOME");
150 
151  if (flags & CSV_CHECK_DATA_CONF) {
152 
153  conf_data = getenv("CONF_DATA");
154 
155  if (!conf_data && !ingest_home) {
156 
158  "Could not create configuration file search paths:\n"
159  " -> environment variables CONF_DATA and INGEST_HOME do not exist");
160 
162 
163  return(0);
164  }
165  }
166  else if (!ingest_home) {
167 
169  "Could not create configuration file search paths:\n"
170  " -> environment variable INGEST_HOME does not exist");
171 
173 
174  return(0);
175  }
176 
177  search_paths = (char **)calloc(2, sizeof(char *));
178  if (!search_paths) goto MEMORY_ERROR;
179 
180  search_npaths = 0;
181 
182  for (pi = 0; pi < 2; ++pi) {
183 
184  if (pi == 0) {
185  if (!conf_data) continue;
186  snprintf(path, PATH_MAX, "%s/%s", conf_data, proc_name);
187  }
188  else {
189  if (!ingest_home) continue;
190  snprintf(path, PATH_MAX, "%s/conf/ingest/%s", ingest_home, proc_name);
191  }
192 
193  search_paths[search_npaths] = strdup(path);
194  if (!search_paths[search_npaths]) goto MEMORY_ERROR;
195  search_npaths++;
196  }
197 
198  conf->search_npaths = search_npaths;
199  conf->search_paths = (const char **)search_paths;
200 
201  *npaths = conf->search_npaths;
202  *paths = (const char **)conf->search_paths;
203 
204  return(1);
205 
206 MEMORY_ERROR:
207 
208  if (search_paths) {
209 
210  for (pi = 0; pi < search_npaths; ++pi) {
211  free(search_paths[search_npaths]);
212  }
213 
214  free(search_paths);
215  }
216 
218  "Memory allocation error creating list of configuration file search paths\n");
219 
221 
222  return(0);
223 }
224 
225 /**
226  * Find a CSV Ingest configuration file.
227  *
228  * The frist time this function is called the data_time argument must be
229  * set to 0. This will find the main conf file containing the the file
230  * name patterns and all default configuration settings. It will also set
231  * the path to look for time varying conf files in subsequent calls to
232  * this function.
233  *
234  * If an error occurs in this function it will be appended to the log and
235  * error mail messages, and the process status will be set appropriately.
236  *
237  * @param conf Pointer to the CSVConf structure. The file name and
238  * path will be stored in the file_name and file_path
239  * structure members.
240  *
241  * @param data_time The start time of the data being processed, this should
242  * be determined from the file name. Specify 0 to find the
243  * main conf file containing the the file name patterns
244  * and all default configuration settings.
245  *
246  * @param flags Control Flags:
247  *
248  * - CSV_CHECK_DATA_CONF check for config files under the root directory
249  * defined by the CONF_DATA environment variable.
250  *
251  * @param path output buffer for the file path,
252  *
253  * @param name output buffer for the file name
254  *
255  * @retval 1 if a file was found
256  * @retval 0 if a file was not found
257  * @retval -1 if error occurred
258  */
259 static int _csv_find_conf_file(CSVConf *conf, time_t data_time, int flags, char *path, char *name)
260 {
261  const char *site = conf->site;
262  const char *fac = conf->fac;
263  const char *base_name = conf->name;
264  const char *level = conf->level;
265 
266  int search_npaths;
267  const char **search_paths;
268 
269  char full_path[PATH_MAX];
270  char pattern[PATH_MAX];
271  const char *patternp;
272  int found_file;
273  int pi, ni;
274 
275  DirList *dirlist;
276  int nfiles;
277  char **file_list;
278  time_t file_time;
279  char ts1[32];
280  int fi;
281 
282  found_file = 0;
283 
284  if (data_time == 0) {
285 
286  /* Looking for main configuration file */
287 
288  if (!_csv_get_conf_search_paths(conf, flags, &search_npaths, &search_paths)) {
289  return(-1);
290  }
291 
292  /* Loop over possible configuration file directories */
293 
294  for (pi = 0; pi < search_npaths; ++pi) {
295 
296  strncpy(path, search_paths[pi], PATH_MAX);
297 
299  "Checking for main csv_conf file in: %s\n", path);
300 
301  if (access(path, F_OK) != 0) {
302 
303  if (errno != ENOENT) {
304 
306  "Could not access directory: %s\n"
307  " -> %s\n", path, strerror(errno));
308 
310 
311  return(-1);
312  }
313 
315  " - path does not exist\n");
316 
317  continue;
318  }
319 
320  /* Loop over possible names of configuration files */
321 
322  for (ni = 0; ni < 2; ++ni) {
323 
324  if (level) {
325  if (ni == 0) {
326  snprintf(name, PATH_MAX, "%s%s%s.%s.csv_conf", site, base_name, fac, level);
327  }
328  else {
329  snprintf(name, PATH_MAX, "%s.%s.csv_conf", base_name, level);
330  }
331  }
332  else {
333  if (ni == 0) {
334  snprintf(name, PATH_MAX, "%s%s%s.csv_conf", site, base_name, fac);
335  }
336  else {
337  snprintf(name, PATH_MAX, "%s.csv_conf", base_name);
338  }
339  }
340 
341  DSPROC_DEBUG_LV1(" - checking for file: %s\n", name);
342 
343  snprintf(full_path, PATH_MAX, "%s/%s", path, name);
344 
345  if (access(full_path, F_OK) == 0) {
346  DSPROC_DEBUG_LV1(" - found\n");
347  found_file = 1;
348  break;
349  }
350  else if (errno == ENOENT) {
351  DSPROC_DEBUG_LV1(" - not found\n");
352  }
353  else {
354 
356  "Could not access file: %s\n"
357  " -> %s\n", full_path, strerror(errno));
358 
360 
361  return(-1);
362  }
363 
364  } // end loop over file name search lists
365 
366  if (found_file) break;
367 
368  } // end loop over possible configuration file directories
369  }
370  else { // data_time > 0
371 
372  /* Looking for time specific configuration file names */
373 
374  nfiles = 0;
375  dirlist = (DirList *)NULL;
376 
377  if (conf->dirlist) {
378 
379  dirlist = conf->dirlist;
380  nfiles = dirlist_get_file_list(dirlist, &file_list);
381  if (nfiles < 0) {
382 
384  "Could not get list of configuration files in: %s\n",
385  path);
386 
388 
389  return(-1);
390  }
391 
392  strncpy(path, dirlist->path, PATH_MAX);
393  }
394  else {
395 
396  if (level) {
397  sprintf(pattern, "^%s%s%s\\.%s\\.[0-9]{8}\\.[0-9]{6}\\.csv_conf", site, base_name, fac, level);
398  }
399  else {
400  sprintf(pattern, "^%s%s%s\\.[0-9]{8}\\.[0-9]{6}\\.csv_conf", site, base_name, fac);
401  }
402 
403  patternp = &(pattern[0]);
404 
405  if (!_csv_get_conf_search_paths(conf, flags, &search_npaths, &search_paths)) {
406  return(-1);
407  }
408 
409  /* Loop over possible configuration file directories */
410 
411  for (pi = 0; pi < search_npaths; ++pi) {
412 
413  strncpy(path, search_paths[pi], PATH_MAX);
414 
416  "Checking for time varying csv_conf files in: %s\n", path);
417 
418  if (access(path, F_OK) != 0) {
419 
420  if (errno != ENOENT) {
421 
423  "Could not access directory: %s\n"
424  " -> %s\n", path, strerror(errno));
425 
427 
428  return(-1);
429  }
430 
432  " - path does not exist\n");
433 
434  continue;
435  }
436 
437  /* Check for time varying configuration files */
438 
439  if (dirlist) dirlist_free(dirlist);
440 
441  dirlist = dirlist_create(path, 0);
442  if (!dirlist ||
443  !dirlist_add_patterns(dirlist, 1, &patternp, 0)) {
444 
446  "Could not create configuration files list for: %s\n",
447  path);
448 
450 
451  if (dirlist) dirlist_free(dirlist);
452  return(-1);
453  }
454 
455  nfiles = dirlist_get_file_list(dirlist, &file_list);
456  if (nfiles < 0) {
457 
459  "Could not get configuration files list for: %s\n",
460  path);
461 
463 
464  dirlist_free(dirlist);
465  return(-1);
466  }
467  else if (nfiles == 0) {
468  DSPROC_DEBUG_LV1(" - none found\n");
469  }
470  else {
471  DSPROC_DEBUG_LV1(" - found\n");
472  break;
473  }
474 
475  } /* end loop over possible configuration file directories */
476 
477  conf->dirlist = dirlist;
478 
479  } // end if !conf->dirlist
480 
481  /* Now look for the file for the specified data time */
482 
483  if (nfiles > 0) {
484 
486  "Looking for csv_conf file for data time: %s\n",
487  format_secs1970(data_time, ts1));
488 
489  for (fi = nfiles - 1; fi > -1; --fi) {
490  file_time = _csv_get_conf_file_name_time(file_list[fi]);
491  if (data_time >= file_time) break;
492  }
493 
494  if (fi < 0) {
495  DSPROC_DEBUG_LV1(" - not found\n");
496  }
497  else {
498  found_file = 1;
499  strncpy(name, file_list[fi], PATH_MAX);
500  DSPROC_DEBUG_LV1(" - found: %s\n", name);
501  }
502  }
503 
504  } // end if data_time > 0
505 
506  return(found_file);
507 }
508 
509 /**
510  * Split a string on the next delimiter.
511  *
512  * This function will return a pointer to the string following the next
513  * delimiter in the input string. The input string will also be terminated
514  * at the delimiter and trailing whitespace will be removed.
515  *
516  * The returned pointer will only be valid as long as the memory used by the
517  * input string is not altered or freed.
518  *
519  * @param strp pointer to the delimited string
520  * @param delim delimiter character
521  *
522  * @retval strp pointer to the beginning of the next string
523  * @retval NULL if the delimiter was not found
524  */
525 static char *_csv_split_delim(char *strp, char delim)
526 {
527  char *delimp;
528  char *endp;
529 
530  delimp = dsproc_find_csv_delim(strp, delim);
531  if (!delimp) return((char *)NULL);
532 
533  /* Trim trailing white-space from the previous substring */
534 
535  endp = delimp - 1;
536  while (endp > strp && isspace(*endp)) *endp-- = '\0';
537 
538  /* Terminate field */
539 
540  *delimp = '\0';
541 
542  /* Skip leading white-space in the next substring */
543 
544  strp = dsproc_skip_csv_whitespace(delimp+1, delim);
545 
546  return(strp);
547 }
548 
549 /**
550  * Strip comments from in memory copy of conf file.
551  *
552  * @param file_data pointer to start of the config file data
553  */
554 static void _csv_strip_comments(char *file_data)
555 {
556  char *cp1 = file_data;
557  char *cp2 = file_data;
558  char quote = '\0';
559 
560  while (*cp2 != '\0') {
561 
562  if (*cp2 == '"' || *cp2 == '\'') {
563 
564  /* quoted strings */
565 
566  quote = *cp2;
567 
568  *cp1++ = *cp2++;
569 
570  while (*cp2 != '\0') {
571 
572  if (*cp2 == quote) {
573 
574  if (*(cp2+1) == quote) {
575  *cp1++ = *cp2++;
576  }
577  else {
578  break;
579  }
580  }
581 
582  *cp1++ = *cp2++;
583  }
584  }
585  else if (*cp2 == '#') {
586 
587  /* comments */
588 
589  for (++cp2; *cp2 != '\n' && *cp2 != '\0'; ++cp2);
590  }
591 
592  *cp1++ = *cp2++;
593  }
594 
595  *cp1++ = *cp2++;
596 }
597 
598 /**
599  * Trim end of line whitespace.
600  *
601  * @param linep pointer to the line to trim
602  */
603 static void _csv_trim_eol(char *linep)
604 {
605  char *eol = linep + strlen(linep) - 1;
606  while ((eol >= linep) && isspace(*eol)) *eol-- = '\0';
607 }
608 
609 /**
610  * Trim beginning and ending quotes from a string.
611  *
612  * @param linep pointer to the line to trim
613  *
614  * @retval linep pointer to the beging of the line
615  */
616 static char *_csv_trim_quotes(char *linep)
617 {
618  size_t length = strlen(linep);
619 
620  if (length > 1) {
621 
622  if ((*linep == '"' || *linep == '\'') &&
623  linep[length-1] == *linep) {
624 
625  linep[length-1] = '\0';
626  linep += 1;
627  }
628  }
629 
630  return(linep);
631 }
632 
633 /**
634  * Load a CSV Configuration file into a CVSConf structure.
635  *
636  * If an error occurs in this function it will be appended to the log and
637  * error mail messages, and the process status will be set appropriately.
638  *
639  * @param conf pointer to CSVConf structure to populate
640  * @param path path to the csv configuration file
641  * @param name name of the csv configuration file
642  * @param flags reserved for control flags
643  *
644  * @retval 1 if successful
645  * @retval 0 if an error occurred
646  */
647 static int _csv_load_conf_file(
648  CSVConf *conf,
649  const char *path,
650  const char *name,
651  int flags)
652 {
653  char full_path[PATH_MAX];
654  struct stat file_stats;
655  size_t nbytes;
656  size_t nread;
657  FILE *fp;
658  char *file_data;
659  char *linep;
660  int linenum;
661  char *eol;
662  char chr;
663 
664  const char *key;
665  size_t keylen;
666  size_t linelen;
667 
668  int count;
669  int buflen;
670  char **buffer;
671 
672  char *tc_name;
673  char *out_name;
674  int reload;
675  int ki;
676 
677  flags = flags; // prevent "unused parameter" compiler warning
678 
679  DSPROC_DEBUG_LV1("Reading Configuration File: %s/%s\n", path, name);
680 
681  /* Set the full path to the conf file */
682 
683  snprintf(full_path, PATH_MAX, "%s/%s", path, name);
684 
685  /* Get the file status */
686 
687  if (stat(full_path, &file_stats) < 0) {
688 
690  "Could not get file stats for conf file: %s\n"
691  " -> %s\n", full_path, strerror(errno));
692 
694 
695  return(0);
696  }
697 
698  /* Read in the entire file */
699 
700  nbytes = file_stats.st_size;
701  if (nbytes == 0) return(1);
702 
703  file_data = (char *)malloc((nbytes + 1) * sizeof(char));
704  if (!file_data) {
705 
707  "Memory allocation error loading conf file: %s\n",
708  full_path);
709 
711 
712  return(0);
713  }
714 
715  fp = fopen(full_path, "r");
716  if (!fp) {
717 
719  "Could not open file: %s\n"
720  " -> %s\n", full_path, strerror(errno));
721 
723 
724  free(file_data);
725  return(0);
726  }
727 
728  nread = fread(file_data, 1, nbytes, fp);
729  fclose(fp);
730 
731  if (nread != nbytes) {
732 
734  "Could not read conf file: %s\n"
735  " -> %s\n", full_path, strerror(errno));
736 
738 
739  free(file_data);
740  return(0);
741  }
742 
743  file_data[nbytes] = '\0';
744 
745  /* Remove comments */
746 
747  _csv_strip_comments(file_data);
748 
749  /* Allocate memory for the buffer */
750 
751  buflen = 64;
752  buffer = calloc(buflen, sizeof(char *));
753  if (!buffer) goto MEMORY_ERROR;
754 
755  /* Loop over lines from the conf file */
756 
757  key = (char *)NULL;
758  linenum = 0;
759  reload = 0;
760 
761  for (linep = file_data; *linep != '\0'; linep = eol + 1) {
762 
763  linenum += 1;
764 
765  /* Find end-of-line */
766 
767  eol = dsproc_find_csv_delim(linep, '\n');
768  if (eol) {
769 
770  /* Skip blank lines */
771 
772  if (eol == linep) {
773  continue;
774  }
775 
776  /* Handle carriage returns */
777 
778  if (*(eol - 1) == '\r') {
779  *(eol - 1) = '\0';
780  }
781 
782  /* Null terminate the line */
783 
784  *eol = '\0';
785  }
786 
787  /* Trim end-of-line whitespace */
788 
789  _csv_trim_eol(linep);
790 
791  /* Skip blank lines */
792 
793  if (*linep == '\0') {
794  if (eol) continue;
795  else break;
796  }
797 
798  /* Check if this is a key word. */
799  if (isalpha(*linep)) {
800 
801  linelen = strlen(linep);
802 
803  for (ki = 0; _ConfKeys[ki]; ++ki) {
804 
805  keylen = strlen(_ConfKeys[ki]);
806  if (keylen > linelen) continue;
807 
808  chr = linep[keylen];
809  if (isspace(chr) || chr == ':' || chr == '=' || chr == '\0') {
810  if (strncmp(linep, _ConfKeys[ki], keylen) == 0) {
811  break;
812  }
813  }
814  }
815 
816  if (_ConfKeys[ki]) {
817 
818  key = _ConfKeys[ki];
819  linep += keylen;
820  reload = 1;
821 
822  /* skip whitespace, colons, and = signs */
823 
824  while (isspace(*linep) || *linep == ':' || *linep == '=') ++linep;
825  }
826  else {
827 
829  "Invalid keyword found on line %d in file: %s\n"
830  " -> '%s'\n",
831  linenum, full_path, linep);
832 
834 
835  free(file_data);
836  return(0);
837  }
838  }
839  else {
840 
841  /* skip whitespace */
842 
843  while (isspace(*linep)) ++linep;
844  }
845 
846  /* Skip blank lines */
847 
848  if (*linep == '\0') {
849  if (eol) continue;
850  else break;
851  }
852 
853  /* Make sure we have found a keyword */
854 
855  if (!key) {
856 
858  "Invalid configuration file: %s\n"
859  " -> keyword not found before first line of text\n",
860  full_path);
861 
863 
864  free(file_data);
865  return(0);
866  }
867 
868  /* Set the configuration value */
869 
870  if (strcmp(key, "FILE_NAME_PATTERNS") == 0) {
871 
872  if (reload) dsproc_clear_csv_file_name_patterns(conf);
873 
874  count = dsproc_count_csv_delims(linep, ',') + 1;
875 
876  if (buflen < count) {
877  buffer = realloc(buffer, count * sizeof(char *));
878  if (!buffer) goto MEMORY_ERROR;
879  buflen = count;
880  }
881 
882  count = dsproc_split_csv_string(linep, ',', buflen, buffer);
883 
884  if (!dsproc_add_csv_file_name_patterns(conf, count, (const char **)buffer)) {
885  free(file_data);
886  return(0);
887  }
888  }
889  else if (strcmp(key, "FILE_TIME_PATTERNS") == 0) {
890 
891  if (reload) dsproc_clear_csv_file_time_patterns(conf);
892 
893  count = dsproc_count_csv_delims(linep, ',') + 1;
894  if (buflen < count) {
895  buffer = realloc(buffer, count * sizeof(char *));
896  if (!buffer) goto MEMORY_ERROR;
897  buflen = count;
898  }
899 
900  count = dsproc_split_csv_string(linep, ',', buflen, buffer);
901 
902  if (!dsproc_add_csv_file_time_patterns(conf, count, (const char **)buffer)) {
903  free(file_data);
904  return(0);
905  }
906  }
907  else if (strcmp(key, "DELIMITER") == 0) {
908 
909  linep = _csv_trim_quotes(linep);
910  if (*linep == '\\' && *(linep+1) == 't') {
911  conf->delim = '\t';
912  }
913  else {
914  conf->delim = *linep;
915  }
916  }
917  else if (strcmp(key, "HEADER_LINE") == 0) {
918 
919  if (reload) {
920  if (conf->header_line) {
921  free((void *)conf->header_line);
922  conf->header_line = (char *)NULL;
923  }
924  }
925 
926  if (!dsproc_append_csv_header_line(conf, linep)) {
927  free(file_data);
928  return(0);
929  }
930  }
931  else if (strcmp(key, "HEADER_LINE_TAG") == 0) {
932 
933  linep = _csv_trim_quotes(linep);
934 
935  if (conf->header_tag) free((void *)conf->header_tag);
936  conf->header_tag = strdup(linep);
937  if (!conf->header_tag) goto MEMORY_ERROR;
938  }
939  else if (strcmp(key, "HEADER_LINE_NUMBER") == 0) {
940 
941  linep = _csv_trim_quotes(linep);
942  conf->header_linenum = atoi(linep);
943  }
944  else if (strcmp(key, "NUMBER_OF_HEADER_LINES") == 0) {
945 
946  linep = _csv_trim_quotes(linep);
947  conf->header_nlines = atoi(linep);
948  }
949  else if (strcmp(key, "NUMBER_OF_COLUMNS") == 0) {
950 
951  linep = _csv_trim_quotes(linep);
952  conf->exp_ncols = atoi(linep);
953  }
954  else if (strcmp(key, "TIME_COLUMN_PATTERNS") == 0) {
955 
956  if (reload) dsproc_clear_csv_time_column_patterns(conf);
957 
958  tc_name = linep;
959  linep = _csv_split_delim(linep, ':');
960 
961  if (!linep || *linep == '\0') {
962 
964  "Invalid time column format found on line %d in file: %s\n"
965  " -> expected format: name: pattern(s)\n",
966  linenum, full_path);
967 
969 
970  free(file_data);
971  return(0);
972  }
973 
974  count = dsproc_count_csv_delims(linep, ',') + 1;
975 
976  if (buflen < count) {
977  buffer = realloc(buffer, count * sizeof(char *));
978  if (!buffer) goto MEMORY_ERROR;
979  buflen = count;
980  }
981 
982  count = dsproc_split_csv_string(linep, ',', buflen, buffer);
983 
984  if (!dsproc_add_csv_time_column_patterns(conf, tc_name, count, (const char **)buffer)) {
985  free(file_data);
986  return(0);
987  }
988  }
989  else if (strcmp(key, "SPLIT_INTERVAL") == 0) {
990 
991  linep = _csv_trim_quotes(linep);
992 
993  if (conf->split_interval) free((void *)conf->split_interval);
994  conf->split_interval = strdup(linep);
995  if (!conf->split_interval) goto MEMORY_ERROR;
996  }
997  else if (strcmp(key, "FIELD_MAP") == 0) {
998 
999  if (reload) dsproc_clear_csv_field_maps(conf);
1000 
1001  out_name = linep;
1002  linep = _csv_split_delim(linep, ':');
1003 
1004  if (!linep || *linep == '\0') {
1005 
1007  "Invalid field map format found on line %d in file: %s\n"
1008  " -> expected format: dod_var_name: csv column name [, csv units [, csv missing value string]]\n",
1009  linenum, full_path);
1010 
1012 
1013  free(file_data);
1014  return(0);
1015  }
1016 
1017  count = dsproc_count_csv_delims(linep, ',') + 1;
1018  if (buflen < count) {
1019  buffer = realloc(buffer, count * sizeof(char *));
1020  if (!buffer) goto MEMORY_ERROR;
1021  buflen = count;
1022  }
1023 
1024  count = dsproc_split_csv_string(linep, ',', buflen, buffer);
1025 
1027  conf, out_name, buffer[0], count - 1, (const char **)(buffer + 1))) {
1028 
1029  free(file_data);
1030  return(0);
1031  }
1032  }
1033 
1034  reload = 0;
1035 
1036  if (!eol) break;
1037 
1038  } /* end loop reading file */
1039 
1040  free(file_data);
1041  if (buffer) free(buffer);
1042 
1043  return(1);
1044 
1045 MEMORY_ERROR:
1046 
1047  if (buffer) free(buffer);
1048 
1050  "Memory allocation error loading CSV configuration file: %s/%s\n",
1051  path, name);
1052 
1054 
1055  return(0);
1056 }
1057 
1058 /*******************************************************************************
1059  * Public Functions
1060  */
1061 /** @publicsection */
1062 
1063 /**
1064  * Add an entry to the field map
1065  *
1066  * If an error occurs in this function it will be appended to the log and
1067  * error mail messages, and the process status will be set appropriately.
1068  *
1069  * @param conf pointer to CSVConf structure to populate
1070  * @param out_name name of the output variable, or NULL to use the column name
1071  * @param col_name name of the CSV column
1072  * @param nargs length of args list
1073  * @param args list of additional arguments in the following order
1074  * (specify NULL or empty string for no value):
1075  * - units
1076  * - comma separated list of missing value strings
1077  *
1078  * @retval 1 if successful
1079  * @retval 0 if an error occurred
1080  */
1082  CSVConf *conf,
1083  const char *out_name,
1084  const char *col_name,
1085  int nargs,
1086  const char **args)
1087 {
1088  CSVFieldMap *map;
1089  int length;
1090  CSVFieldMap *list;
1091  const char *namep;
1092  int count;
1093  int i;
1094 
1095  map = (CSVFieldMap *)NULL;
1096 
1097  /* Check if we already have an entry for this output name */
1098 
1099  if (out_name && *out_name != '\0') {
1100  for (i = 0; i < conf->field_nmaps; ++i) {
1101 
1102  map = &(conf->field_maps[i]);
1103  if (strcmp(map->out_name, out_name) == 0) {
1104  break;
1105  }
1106  }
1107 
1108  if (i == conf->field_nmaps) {
1109  map = (CSVFieldMap *)NULL;
1110  }
1111  else {
1112  if (map->col_name) free((void *)map->col_name);
1113  if (map->units) free((void *)map->units);
1114  if (map->missings) free(map->missings);
1115  if (map->missbuf) free(map->missbuf);
1116 
1117  namep = map->out_name;
1118  memset(map, 0, sizeof(CSVFieldMap));
1119  map->out_name = namep;
1120  }
1121  }
1122 
1123  /* Add a new field map entry if an existing one was not found */
1124 
1125  if (!map) {
1126  length = conf->field_nmaps + 1;
1127  list = (CSVFieldMap *)realloc(
1128  conf->field_maps, length * sizeof(CSVFieldMap));
1129 
1130  if (!list) goto MEMORY_ERROR;
1131 
1132  conf->field_maps = list;
1133 
1134  map = &(conf->field_maps[conf->field_nmaps]);
1135  memset(map, 0, sizeof(CSVFieldMap));
1136 
1137  if (out_name && *out_name != '\0') {
1138  map->out_name = strdup(out_name);
1139  if (!map->out_name) goto MEMORY_ERROR;
1140  }
1141 
1142  conf->field_nmaps += 1;
1143  }
1144 
1145  /* Set CSV column name */
1146 
1147  map->col_name = strdup(col_name);
1148 
1149  /* Set CSV units */
1150 
1151  if (nargs >= 1) {
1152  if (args[0] && *args[0] != '\0') {
1153  map->units = strdup(args[0]);
1154  }
1155  }
1156 
1157  /* Set CSV units and missing value strings */
1158 
1159  if (nargs >= 2) {
1160 
1161  if (args[1] && *args[1] != '\0') {
1162 
1163  map->missbuf = strdup(args[1]);
1164 
1165  count = dsproc_count_csv_delims(map->missbuf, ',') + 1;
1166 
1167  map->missings = calloc(count, sizeof(char *));
1168  if (!map->missings) goto MEMORY_ERROR;
1169 
1171  map->missbuf, ',', count, (char **)map->missings);
1172  }
1173  }
1174 
1175  return(1);
1176 
1177 MEMORY_ERROR:
1178 
1180  "Memory allocation error appending an entry to the CSV field map\n");
1181 
1183 
1184  return(0);
1185 }
1186 
1187 /**
1188  * Add file name patterns to a CSVConf structure
1189  *
1190  * If an error occurs in this function it will be appended to the log and
1191  * error mail messages, and the process status will be set appropriately.
1192  *
1193  * @param conf pointer to CSVConf structure to populate
1194  * @param npatterns number of patterns
1195  * @param patterns list of extended regex patterns, see man regex(7)
1196  *
1197  * @retval 1 if successful
1198  * @retval 0 if an error occurred
1199  */
1201  CSVConf *conf,
1202  int npatterns,
1203  const char **patterns)
1204 {
1205  int length;
1206  const char **list;
1207  int i, j;
1208 
1209  length = conf->fn_npatterns + npatterns;
1210  list = (const char **)realloc(
1211  conf->fn_patterns, length * sizeof(const char *));
1212 
1213  if (!list) goto MEMORY_ERROR;
1214 
1215  conf->fn_patterns = list;
1216 
1217  for (i = conf->fn_npatterns, j = 0; j < npatterns; ++i, ++j) {
1218 
1219  if (patterns[j] && *patterns[j] != '\0') {
1220  conf->fn_patterns[i] = strdup(patterns[j]);
1221  if (!conf->fn_patterns[i]) goto MEMORY_ERROR;
1222  conf->fn_npatterns += 1;
1223  }
1224  }
1225 
1226  return(1);
1227 
1228 MEMORY_ERROR:
1229 
1231  "Memory allocation error adding CSV file name patterns to configuration structure\n");
1232 
1234 
1235  return(0);
1236 }
1237 
1238 /**
1239  * Add file time patterns to a CSVConf structure
1240  *
1241  * If an error occurs in this function it will be appended to the log and
1242  * error mail messages, and the process status will be set appropriately.
1243  *
1244  * @param conf pointer to CSVConf structure to populate
1245  * @param npatterns number of patterns
1246  * @param patterns list of extended regex patterns, see man regex(7)
1247  *
1248  * @retval 1 if successful
1249  * @retval 0 if an error occurred
1250  */
1252  CSVConf *conf,
1253  int npatterns,
1254  const char **patterns)
1255 {
1256  int length;
1257  const char **list;
1258  int i, j;
1259 
1260  /* Add the new patterns to the list */
1261 
1262  length = conf->ft_npatterns + npatterns;
1263  list = (const char **)realloc(
1264  conf->ft_patterns, length * sizeof(const char *));
1265 
1266  if (!list) goto MEMORY_ERROR;
1267 
1268  conf->ft_patterns = list;
1269 
1270  for (i = conf->ft_npatterns, j = 0; j < npatterns; ++i, ++j) {
1271 
1272  if (patterns[j] && *patterns[j] != '\0') {
1273  conf->ft_patterns[i] = strdup(patterns[j]);
1274  if (!conf->ft_patterns[i]) goto MEMORY_ERROR;
1275  conf->ft_npatterns += 1;
1276  }
1277  }
1278 
1279  return(1);
1280 
1281 MEMORY_ERROR:
1282 
1284  "Memory allocation error adding CSV file time patterns to configuration structure\n");
1285 
1287 
1288  return(0);
1289 }
1290 
1291 /**
1292  * Add time column patterns to a CSVConf structure
1293  *
1294  * If an error occurs in this function it will be appended to the log and
1295  * error mail messages, and the process status will be set appropriately.
1296  *
1297  * @param conf pointer to CSVConf structure to populate
1298  * @param name name of the time column
1299  * @param npatterns number of patterns
1300  * @param patterns list of time patterns
1301  *
1302  * @retval 1 if successful
1303  * @retval 0 if an error occurred
1304  */
1306  CSVConf *conf,
1307  const char *name,
1308  int npatterns,
1309  const char **patterns)
1310 {
1311  CSVTimeCol *time_col;
1312  int length;
1313  CSVTimeCol *list;
1314  const char **strlist;
1315  int i, j;
1316 
1317  time_col = (CSVTimeCol *)NULL;
1318 
1319  /* Check if we already have an entry for this time column */
1320 
1321  for (i = 0; i < conf->time_ncols; ++i) {
1322 
1323  time_col = &(conf->time_cols[i]);
1324 
1325  if (strcmp(time_col->name, name) == 0) {
1326  break;
1327  }
1328  }
1329 
1330  if (i == conf->time_ncols) {
1331 
1332  /* Add a new time column */
1333 
1334  length = conf->time_ncols + 1;
1335  list = (CSVTimeCol *)realloc(
1336  conf->time_cols, length * sizeof(CSVTimeCol));
1337 
1338  if (!list) goto MEMORY_ERROR;
1339 
1340  conf->time_cols = list;
1341 
1342  time_col = &(conf->time_cols[conf->time_ncols]);
1343  memset(time_col, 0, sizeof(CSVTimeCol));
1344 
1345  time_col->name = strdup(name);
1346  if (!time_col->name) goto MEMORY_ERROR;
1347 
1348  conf->time_ncols += 1;
1349  }
1350 
1351  /* Add time string patterns */
1352 
1353  length = time_col->npatterns + npatterns;
1354  strlist = (const char **)realloc(
1355  time_col->patterns, length * sizeof(const char *));
1356 
1357  if (!strlist) goto MEMORY_ERROR;
1358 
1359  time_col->patterns = strlist;
1360 
1361  for (i = time_col->npatterns, j = 0; j < npatterns; ++i, ++j) {
1362 
1363  if (patterns[j] && *patterns[j] != '\0') {
1364  time_col->patterns[i] = strdup(patterns[j]);
1365  if (!time_col->patterns[i]) goto MEMORY_ERROR;
1366  time_col->npatterns += 1;
1367  }
1368  }
1369 
1370  return(1);
1371 
1372 MEMORY_ERROR:
1373 
1375  "Memory allocation error adding CSV time column patterns to configuration structure\n");
1376 
1378 
1379  return(0);
1380 }
1381 
1382 /**
1383  * Append a string to the end of the header line
1384  *
1385  * If an error occurs in this function it will be appended to the log and
1386  * error mail messages, and the process status will be set appropriately.
1387  *
1388  * @param conf pointer to CSVConf structure to populate
1389  * @param string string to append to the header line
1390  *
1391  * @retval 1 if successful
1392  * @retval 0 if an error occurred
1393  */
1395  CSVConf *conf,
1396  const char *string)
1397 {
1398  int length;
1399  char *header;
1400 
1401  if (conf->header_line) {
1402 
1403  length = strlen(conf->header_line) + strlen(string) + 1;
1404  header = (char *)realloc(
1405  (void *)conf->header_line, length * sizeof(char));
1406 
1407  if (!header) goto MEMORY_ERROR;
1408 
1409  conf->header_line = header;
1410 
1411  strcat(header, string);
1412  }
1413  else {
1414  conf->header_line = strdup(string);
1415  if (!conf->header_line) goto MEMORY_ERROR;
1416  }
1417 
1418  return(1);
1419 
1420 MEMORY_ERROR:
1421 
1423  "Memory allocation error appending a string to the CSV header line\n");
1424 
1426 
1427  return(0);
1428 }
1429 
1430 /**
1431  * Clear the time column patterns in a CSVConf structure
1432  *
1433  * @param conf pointer to CSVConf structure to populate
1434  */
1436 {
1437  CSVFieldMap *map;
1438  int i;
1439 
1440  if (conf->field_maps) {
1441 
1442  for (i = 0; i < conf->field_nmaps; ++i) {
1443 
1444  map = &(conf->field_maps[i]);
1445 
1446  if (map->out_name) free((void *)map->out_name);
1447  if (map->col_name) free((void *)map->col_name);
1448  if (map->units) free((void *)map->units);
1449  if (map->missings) free((void *)map->missings);
1450  if (map->missbuf) free(map->missbuf);
1451  }
1452 
1453  free(conf->field_maps);
1454  }
1455 
1456  conf->field_nmaps = 0;
1457  conf->field_maps = (CSVFieldMap *)NULL;
1458 }
1459 
1460 /**
1461  * Clear the file name patterns in a CSVConf structure
1462  *
1463  * @param conf pointer to CSVConf structure to populate
1464  */
1466 {
1467  int i;
1468 
1469  if (conf->fn_patterns) {
1470 
1471  for (i = 0; i < conf->fn_npatterns; ++i) {
1472  if (conf->fn_patterns[i]) free((void *)conf->fn_patterns[i]);
1473  }
1474 
1475  free((void *)conf->fn_patterns);
1476  }
1477 
1478  conf->fn_npatterns = 0;
1479  conf->fn_patterns = (const char **)NULL;
1480 }
1481 
1482 /**
1483  * Clear the file name patterns in a CSVConf structure
1484  *
1485  * @param conf pointer to CSVConf structure to populate
1486  */
1488 {
1489  int i;
1490 
1491  if (conf->ft_patterns) {
1492 
1493  for (i = 0; i < conf->ft_npatterns; ++i) {
1494  if (conf->ft_patterns[i]) free((void *)conf->ft_patterns[i]);
1495  }
1496 
1497  free((void *)conf->ft_patterns);
1498  }
1499 
1500  conf->ft_npatterns = 0;
1501  conf->ft_patterns = (const char **)NULL;
1502 }
1503 
1504 /**
1505  * Clear the time column patterns in a CSVConf structure
1506  *
1507  * @param conf pointer to CSVConf structure to populate
1508  */
1510 {
1511  CSVTimeCol *time_col;
1512  int i, j;
1513 
1514  if (conf->time_cols) {
1515 
1516  for (i = 0; i < conf->time_ncols; ++i) {
1517 
1518  time_col = &(conf->time_cols[i]);
1519 
1520  if (time_col->name) free((void *)time_col->name);
1521 
1522  if (time_col->patterns) {
1523 
1524  for (j = 0; j < time_col->npatterns; ++j) {
1525  if (time_col->patterns[j]) free((void *)time_col->patterns[j]);
1526  }
1527 
1528  free(time_col->patterns);
1529  }
1530  }
1531 
1532  free(conf->time_cols);
1533  }
1534 
1535  conf->time_ncols = 0;
1536  conf->time_cols = (CSVTimeCol *)NULL;
1537 }
1538 
1539 /**
1540  * Free memory used by a CSVConf structure.
1541  *
1542  * @param conf pointer to CSVConf structure
1543  * @param csv pointer to CSVParser
1544  *
1545  * @retval 1 if successful
1546  * @retval 0 if an error occured
1547  */
1549 {
1550  CSVTimeCol *tc;
1551  int status;
1552  int i;
1553 
1554  if (conf->delim) {
1555  dsproc_set_csv_delimiter(csv, conf->delim);
1556  }
1557 
1558  if (conf->ft_npatterns) {
1559 
1561  csv, conf->ft_npatterns, conf->ft_patterns);
1562 
1563  if (status <= 0) {
1564  return(0);
1565  }
1566  }
1567 
1568  if (conf->time_ncols) {
1569 
1570  for (i = 0; i < conf->time_ncols; ++i) {
1571 
1572  tc = &(conf->time_cols[i]);
1573 
1575  csv, tc->name, tc->npatterns, tc->patterns);
1576 
1577  if (status <= 0) {
1578  return(0);
1579  }
1580  }
1581  }
1582 
1583  return(1);
1584 }
1585 
1586 /**
1587  * Create a CSV2CDS Map.
1588  *
1589  * The memory used by the returned CSV2CDS Map is dynamically allocated
1590  * and must be freed using the dsproc_free_csv_to_cds_map().
1591  *
1592  * If an error occurs in this function it will be appended to the log and
1593  * error mail messages, and the process status will be set appropriately.
1594  *
1595  * @param conf pointer to the CSVConf structure
1596  * @param csv pointer to the CSVParser structure
1597  * @param cds pointer to the CDSGroup structure
1598  * @param flags reserved for control flags
1599  *
1600  * @retval map pointer to the CSV2CDS Map structure
1601  * @retval NULL if an error occurred
1602  */
1604  CSVConf *conf,
1605  CSVParser *csv,
1606  CDSGroup *cds,
1607  int flags)
1608 {
1609  CSV2CDSMap *maps;
1610  CSV2CDSMap *map;
1611  CSVFieldMap *field_map;
1612  int max_fields;
1613  char *strp;
1614  int csv_nvars;
1615  int cds_nvars;
1616  CDSVar **cds_vars;
1617  int nmissings;
1618  const char **missings;
1619  int fi, mi, mvi, ti;
1620 
1621  flags = flags;
1622 
1623  /* Allocate memory for the variable data map */
1624 
1625  max_fields = csv->nfields;
1626 
1627  maps = (CSV2CDSMap *)calloc(max_fields + 1, sizeof(CSV2CDSMap));
1628  if (!maps) goto MEMORY_ERROR;
1629 
1630  /* Get the array of variables in the CDSGroup */
1631 
1632  cds_nvars = dsproc_get_dataset_vars(
1633  cds, NULL, 0, &cds_vars, NULL, NULL);
1634 
1635  /* Count the number of columns in the CSV file,
1636  * skipping the time columns. */
1637 
1638  csv_nvars = 0;
1639 
1640  for (fi = 0; fi < csv->nfields; ++fi) {
1641 
1642  for (ti = 0; ti < csv->ntc; ++ti) {
1643 
1644  if (strcmp(csv->tc_names[ti], csv->headers[fi]) == 0) {
1645  break;
1646  }
1647  }
1648 
1649  if (ti != csv->ntc) continue;
1650 
1651  csv_nvars += 1;
1652  }
1653 
1654  /* Check for field map entries in the conf file */
1655 
1656  if (conf->field_maps) {
1657 
1658  /* Loop over field map entries */
1659 
1660  mi = 0;
1661 
1662  for (fi = 0; fi < conf->field_nmaps; ++fi) {
1663 
1664  map = &(maps[mi]);
1665  field_map = &(conf->field_maps[fi]);
1666 
1667  map->csv_name = strdup(field_map->col_name);
1668  if (!map->csv_name) goto MEMORY_ERROR;
1669 
1670  /* Check if an output variable name was specified */
1671 
1672  if (field_map->out_name) {
1673  map->cds_name = strdup(field_map->out_name);
1674  if (!map->cds_name) goto MEMORY_ERROR;
1675  }
1676  else if (csv_nvars <= cds_nvars) {
1677 
1678  /* use variable name at same map index */
1679 
1680  map->cds_name = strdup(cds_vars[mi]->name);
1681  if (!map->cds_name) goto MEMORY_ERROR;
1682  }
1683  else {
1684 
1685  /* use column name as output variable name */
1686 
1687  map->cds_name = strdup(field_map->col_name);
1688  if (!map->cds_name) goto MEMORY_ERROR;
1689 
1690 // BDE: Need update to append _ to front of name if it begins with a number
1691 
1692  /* change all non-alphanumeric characters to underbars */
1693 
1694  strp = (char *)map->cds_name;
1695  while (*strp) {
1696  if (!isalnum(*strp)) *strp = '_';
1697  ++strp;
1698  }
1699  }
1700 
1701  /* Check units were specified */
1702 
1703  if (field_map->units) {
1704  map->csv_units = strdup(field_map->units);
1705  if (!map->csv_units) goto MEMORY_ERROR;
1706  }
1707 
1708  /* Check if any missing values where specified */
1709 
1710  if (field_map->nmissings) {
1711 
1712  nmissings = field_map->nmissings;
1713  missings = field_map->missings;
1714 
1715  map->csv_missings = calloc(nmissings + 1, sizeof(const char *));
1716 
1717  for (mvi = 0; mvi < nmissings; ++mvi) {
1718  map->csv_missings[mvi] = strdup(missings[mvi]);
1719  if (!map->csv_missings[mvi]) goto MEMORY_ERROR;
1720  }
1721  }
1722 
1723  ++mi;
1724  }
1725  }
1726  else {
1727 
1728  /* Loop over all csv columns */
1729 
1730  mi = 0;
1731 
1732  for (fi = 0; fi < csv->nfields; ++fi) {
1733 
1734  /* skip time columns */
1735 
1736  for (ti = 0; ti < csv->ntc; ++ti) {
1737  if (strcmp(csv->tc_names[ti], csv->headers[fi]) == 0) {
1738  break;
1739  }
1740  }
1741 
1742  if (ti != csv->ntc) continue;
1743 
1744  /* skip columns with NULL header names */
1745 
1746  if (csv->headers[fi] == NULL ||
1747  *csv->headers[fi] == '\0') {
1748 
1749  continue;
1750  }
1751 
1752  /* set the csv column name */
1753 
1754  map = &(maps[mi]);
1755 
1756  map->csv_name = strdup(csv->headers[fi]);
1757  if (!map->csv_name) goto MEMORY_ERROR;
1758 
1759  /* check if we should map by index */
1760 
1761  if (csv_nvars <= cds_nvars) {
1762 
1763  /* use variable name at same map index */
1764 
1765  map->cds_name = strdup(cds_vars[mi]->name);
1766  if (!map->cds_name) goto MEMORY_ERROR;
1767  }
1768  else {
1769 
1770  /* use column name as output variable name */
1771 
1772  map->cds_name = strdup(csv->headers[fi]);
1773  if (!map->cds_name) goto MEMORY_ERROR;
1774 
1775 // BDE: Need update to append _ to front of name if it begins with a number
1776 
1777  /* change all non-alphanumeric characters to underbars */
1778 
1779  strp = (char *)map->cds_name;
1780  while (*strp) {
1781  if (!isalnum(*strp)) *strp = '_';
1782  ++strp;
1783  }
1784  }
1785 
1786  ++mi;
1787  }
1788  }
1789 
1790  return(maps);
1791 
1792 MEMORY_ERROR:
1793 
1795 
1797  "Memory allocation error creating CSV2CDSMap structure\n");
1798 
1800 
1801  return((CSV2CDSMap *)NULL);
1802 }
1803 
1804 /**
1805  * Free memory used by a CSVConf structure.
1806  *
1807  * @param conf pointer to CSVConf structure
1808  */
1810 {
1811  int i;
1812 
1813  if (conf) {
1814 
1815  if (conf->proc) free((void *)conf->proc);
1816  if (conf->site) free((void *)conf->site);
1817  if (conf->fac) free((void *)conf->fac);
1818  if (conf->name) free((void *)conf->name);
1819  if (conf->level) free((void *)conf->level);
1820 
1821  if (conf->file_name) free((void *)conf->file_name);
1822  if (conf->file_path) free((void *)conf->file_path);
1823 
1824  if (conf->search_paths) {
1825  for (i = 0; i < conf->search_npaths; ++i) {
1826  if (conf->search_paths[i]) free((void *)conf->search_paths[i]);
1827  }
1828  free((void *)conf->search_paths);
1829  }
1830 
1831  if (conf->dirlist) dirlist_free(conf->dirlist);
1832 
1835 
1836  if (conf->header_line) free((void *)conf->header_line);
1837  if (conf->header_tag) free((void *)conf->header_tag);
1838 
1841 
1842  if (conf->split_interval) free((void *)conf->split_interval);
1843 
1844  free(conf);
1845  }
1846 }
1847 
1848 /**
1849  * Free the memory used by a CSV2CDS Map.
1850  *
1851  * @param map pointer to the CSV2CDS Map structure
1852  */
1854 {
1855  int fi, mvi;
1856 
1857  if (map) {
1858 
1859  for (fi = 0; map[fi].csv_name; ++fi) {
1860 
1861  free((void *)map[fi].cds_name);
1862  free((void *)map[fi].csv_name);
1863  free((void *)map[fi].csv_units);
1864 
1865  if (map[fi].csv_missings) {
1866 
1867  for (mvi = 0; map[fi].csv_missings[mvi]; ++mvi) {
1868  free((void *)map[fi].csv_missings[mvi]);
1869  }
1870 
1871  free(map[fi].csv_missings);
1872  }
1873  }
1874 
1875  free(map);
1876  }
1877 }
1878 
1879 /**
1880  * Initialize a new CSVConf structure.
1881  *
1882  * The memory used by the returned structure is dynamically allocated
1883  * and must be freed using dsproc_free_csv_conf().
1884  *
1885  * If an error occurs in this function it will be appended to the log and
1886  * error mail messages, and the process status will be set appropriately.
1887  *
1888  * @param name the base name of the conf file
1889  * @param level the data level of the conf file, or NULL
1890  *
1891  * @retval conf pointer to the CSVConf structure
1892  * @retval NULL if an error occurred
1893  */
1895  const char *name,
1896  const char *level)
1897 {
1898  const char *proc = dsproc_get_name();
1899  const char *site = dsproc_get_site();
1900  const char *fac = dsproc_get_facility();
1901 
1902  CSVConf *conf = (CSVConf *)calloc(1, sizeof(CSVConf));
1903 
1904  if (!conf ||
1905  !(conf->proc = strdup(proc)) ||
1906  !(conf->site = strdup(site)) ||
1907  !(conf->fac = strdup(fac)) ||
1908  !(conf->name = strdup(name)) ||
1909  (level && !(conf->level = strdup(level)))) {
1910 
1911  if (conf) {
1912  dsproc_free_csv_conf(conf);
1913  }
1914 
1916  "Memory allocation error initializing CSV configuration structure\n");
1917 
1919 
1920  return((CSVConf *)NULL);
1921  }
1922 
1923  return(conf);
1924 }
1925 
1926 /**
1927  * Load the CSV Configuration file into a CVSConf structure.
1928  *
1929  * The frist time this function is called the data_time argument must be
1930  * set to 0. This will load the main conf file containing the the file
1931  * name patterns and all default configuration settings. It will also set
1932  * the path to look for time varying conf files in subsequent calls to
1933  * this function.
1934  *
1935  * If an error occurs in this function it will be appended to the log and
1936  * error mail messages, and the process status will be set appropriately.
1937  *
1938  * @param conf Pointer to the CSVConf structure.
1939  *
1940  * @param data_time The start time of the data being processed, this should
1941  * be determined from the file name. Specify 0 to find the
1942  * main conf file containing the the file name patterns
1943  * and all default configuration settings.
1944  *
1945  * @param flags Control Flags:
1946  *
1947  * - CSV_CHECK_DATA_CONF check for config files under the root directory
1948  * defined by the CONF_DATA environment variable.
1949  *
1950  * @retval 1 if successful
1951  * @retval 0 if a file was not found, or it has already been loaded
1952  * @retval -1 if error occurred
1953  */
1954 int dsproc_load_csv_conf(CSVConf *conf, time_t data_time, int flags)
1955 {
1956  char path[PATH_MAX];
1957  char name[PATH_MAX];
1958  int status;
1959 
1960  status = _csv_find_conf_file(conf, data_time, flags, path, name);
1961  if (status <= 0) return(status);
1962 
1963  /* Check if this file has already been loaded */
1964 
1965  if ((conf->file_path && conf->file_name) &&
1966  (strcmp(conf->file_path, path) == 0) &&
1967  (strcmp(conf->file_name, name) == 0) ) {
1968 
1969  return(0);
1970  }
1971 
1972  /* Read in the configuration file */
1973 
1974  if (!_csv_load_conf_file(conf, path, name, 0)) {
1975  return(-1);
1976  }
1977 
1978  if (msngr_debug_level) {
1979  dsproc_print_csv_conf(stdout, conf);
1980  }
1981 
1982  /* Set the configuration file path and name in the structure */
1983 
1984  if (conf->file_path) free((void *)conf->file_path);
1985  conf->file_path = strdup(path);
1986  if (!conf->file_path) goto MEMORY_ERROR;
1987 
1988  if (conf->file_name) free((void *)conf->file_name);
1989  conf->file_name = strdup(name);
1990  if (!conf->file_name) goto MEMORY_ERROR;
1991 
1992  return(1);
1993 
1994 MEMORY_ERROR:
1995 
1997  "Memory allocation error initializing CSV configuration structure\n");
1998 
2000 
2001  return(-1);
2002 }
2003 
2004 /**
2005  * Print the contents of a CSVConf structure.
2006  *
2007  * If an error occurs in this function it will be appended to the log and
2008  * error mail messages, and the process status will be set appropriately.
2009  *
2010  * @param fp pointer to the output stream
2011  * @param conf pointer to the CSVConf structure
2012  *
2013  * @retval 1 if successful
2014  * @retval 0 if successful
2015  */
2016 int dsproc_print_csv_conf(FILE *fp, CSVConf *conf)
2017 {
2018  CSVTimeCol *time_col;
2019  CSVFieldMap *map;
2020  int i, j;
2021 
2022  fprintf(fp, "CSV Configuration Structure\n\n");
2023 
2024  if (conf->fn_patterns) {
2025 
2026  fprintf(fp, "FILE_NAME_PATTERNS:\n\n");
2027 
2028  for (i = 0; i < conf->fn_npatterns; ++i) {
2029  fprintf(fp, " %s\n", conf->fn_patterns[i]);
2030  }
2031 
2032  fprintf(fp, "\n");
2033  }
2034 
2035  if (conf->ft_patterns) {
2036 
2037  fprintf(fp, "FILE_TIME_PATTERNS:\n\n");
2038 
2039  for (i = 0; i < conf->ft_npatterns; ++i) {
2040  fprintf(fp, " %s\n", conf->ft_patterns[i]);
2041  }
2042 
2043  fprintf(fp, "\n");
2044  }
2045 
2046  if (conf->delim) {
2047  fprintf(fp, "DELIMITER:\n\n '%c'\n\n", conf->delim);
2048  }
2049 
2050  if (conf->header_line) {
2051  fprintf(fp, "HEADER_LINE:\n\n %s\n\n", conf->header_line);
2052  }
2053 
2054  if (conf->header_tag) {
2055  fprintf(fp, "HEADER_LINE_TAG:\n\n %s\n\n", conf->header_tag);
2056  }
2057 
2058  if (conf->header_linenum) {
2059  fprintf(fp, "HEADER_LINE_NUMBER:\n\n %d\n\n", conf->header_linenum);
2060  }
2061 
2062  if (conf->header_nlines) {
2063  fprintf(fp, "NUMBER_OF_HEADER_LINES:\n\n %d\n\n", conf->header_nlines);
2064  }
2065 
2066  if (conf->exp_ncols) {
2067  fprintf(fp, "NUMBER_OF_COLUMNS:\n\n %d\n\n", conf->exp_ncols);
2068  }
2069 
2070  if (conf->time_cols) {
2071 
2072  fprintf(fp, "TIME_COLUMN_PATTERNS:\n\n");
2073 
2074  for (i = 0; i < conf->time_ncols; ++i) {
2075 
2076  time_col = &(conf->time_cols[i]);
2077 
2078  fprintf(fp, " %s:", time_col->name);
2079 
2080  if (time_col->npatterns) {
2081 
2082  fprintf(fp, " %s", time_col->patterns[0]);
2083 
2084  for (j = 1; j < time_col->npatterns; ++j) {
2085  fprintf(fp, ", %s", time_col->patterns[j]);
2086  }
2087  }
2088  fprintf(fp, "\n");
2089  }
2090 
2091  fprintf(fp, "\n");
2092  }
2093 
2094  if (conf->split_interval) {
2095  fprintf(fp, "SPLIT_INTERVAL:\n\n %s\n\n", conf->split_interval);
2096  }
2097 
2098  if (conf->field_maps) {
2099 
2100  fprintf(fp, "FIELD_MAP:\n\n");
2101 
2102  for (i = 0; i < conf->field_nmaps; ++i) {
2103 
2104  map = &(conf->field_maps[i]);
2105 
2106  if (map->out_name)
2107  fprintf(fp, " %s:", map->out_name);
2108  else
2109  fprintf(fp, " :");
2110 
2111  if (map->col_name)
2112  fprintf(fp, " %s", map->col_name);
2113 
2114  if (map->units)
2115  fprintf(fp, ", %s", map->units);
2116 
2117  if (map->nmissings == 1) {
2118  fprintf(fp, ", %s", map->missings[0]);
2119  }
2120  else if (map->nmissings > 1) {
2121 
2122  fprintf(fp, " \"%s", map->missings[0]);
2123 
2124  for (j = 1; j < map->nmissings; ++j) {
2125  fprintf(fp, ", %s", map->missings[j]);
2126  }
2127 
2128  fprintf(fp, "\"");
2129  }
2130 
2131  fprintf(fp, "\n");
2132  }
2133 
2134  fprintf(fp, "\n");
2135  }
2136 
2137  return(1);
2138 }