48 "NUMBER_OF_HEADER_LINES",
50 "TIME_COLUMN_PATTERNS",
64 static time_t _csv_get_conf_file_name_time(
const char *file_name)
66 int YYYY, MM, DD, hh, mm, ss;
79 chrp = strrchr(file_name,
'.');
82 for (count = 0; count < 2; ++count) {
83 if (chrp != file_name) {
84 for (--chrp; *chrp !=
'.' && chrp != file_name; --chrp);
88 if (*chrp ==
'.') ++chrp;
90 YYYY = MM = DD = hh = mm = ss = 0;
92 sscanf(chrp,
"%4d%2d%2d.%2d%2d%2d", &YYYY, &MM, &DD, &hh, &mm, &ss);
116 static int _csv_get_conf_search_paths(
CSVConf *conf,
int flags,
int *npaths,
const char ***paths)
118 const char *proc_name = conf->
proc;
119 const char *conf_data = (
const char *)NULL;
120 const char *ingest_home;
149 ingest_home = getenv(
"INGEST_HOME");
153 conf_data = getenv(
"CONF_DATA");
155 if (!conf_data && !ingest_home) {
158 "Could not create configuration file search paths:\n"
159 " -> environment variables CONF_DATA and INGEST_HOME do not exist");
166 else if (!ingest_home) {
169 "Could not create configuration file search paths:\n"
170 " -> environment variable INGEST_HOME does not exist");
177 search_paths = (
char **)calloc(2,
sizeof(
char *));
178 if (!search_paths)
goto MEMORY_ERROR;
182 for (pi = 0; pi < 2; ++pi) {
185 if (!conf_data)
continue;
186 snprintf(path, PATH_MAX,
"%s/%s", conf_data, proc_name);
189 if (!ingest_home)
continue;
190 snprintf(path, PATH_MAX,
"%s/conf/ingest/%s", ingest_home, proc_name);
193 search_paths[search_npaths] = strdup(path);
194 if (!search_paths[search_npaths])
goto MEMORY_ERROR;
210 for (pi = 0; pi < search_npaths; ++pi) {
211 free(search_paths[search_npaths]);
218 "Memory allocation error creating list of configuration file search paths\n");
259 static int _csv_find_conf_file(
CSVConf *conf, time_t data_time,
int flags,
char *path,
char *name)
261 const char *site = conf->
site;
262 const char *fac = conf->
fac;
263 const char *base_name = conf->
name;
264 const char *level = conf->
level;
267 const char **search_paths;
269 char full_path[PATH_MAX];
270 char pattern[PATH_MAX];
271 const char *patternp;
284 if (data_time == 0) {
288 if (!_csv_get_conf_search_paths(conf, flags, &search_npaths, &search_paths)) {
294 for (pi = 0; pi < search_npaths; ++pi) {
296 strncpy(path, search_paths[pi], PATH_MAX);
299 "Checking for main csv_conf file in: %s\n", path);
301 if (access(path, F_OK) != 0) {
303 if (errno != ENOENT) {
306 "Could not access directory: %s\n"
307 " -> %s\n", path, strerror(errno));
315 " - path does not exist\n");
322 for (ni = 0; ni < 2; ++ni) {
326 snprintf(name, PATH_MAX,
"%s%s%s.%s.csv_conf", site, base_name, fac, level);
329 snprintf(name, PATH_MAX,
"%s.%s.csv_conf", base_name, level);
334 snprintf(name, PATH_MAX,
"%s%s%s.csv_conf", site, base_name, fac);
337 snprintf(name, PATH_MAX,
"%s.csv_conf", base_name);
343 snprintf(full_path, PATH_MAX,
"%s/%s", path, name);
345 if (access(full_path, F_OK) == 0) {
350 else if (errno == ENOENT) {
356 "Could not access file: %s\n"
357 " -> %s\n", full_path, strerror(errno));
366 if (found_file)
break;
384 "Could not get list of configuration files in: %s\n",
392 strncpy(path, dirlist->
path, PATH_MAX);
397 sprintf(pattern,
"^%s%s%s\\.%s\\.[0-9]{8}\\.[0-9]{6}\\.csv_conf", site, base_name, fac, level);
400 sprintf(pattern,
"^%s%s%s\\.[0-9]{8}\\.[0-9]{6}\\.csv_conf", site, base_name, fac);
403 patternp = &(pattern[0]);
405 if (!_csv_get_conf_search_paths(conf, flags, &search_npaths, &search_paths)) {
411 for (pi = 0; pi < search_npaths; ++pi) {
413 strncpy(path, search_paths[pi], PATH_MAX);
416 "Checking for time varying csv_conf files in: %s\n", path);
418 if (access(path, F_OK) != 0) {
420 if (errno != ENOENT) {
423 "Could not access directory: %s\n"
424 " -> %s\n", path, strerror(errno));
432 " - path does not exist\n");
446 "Could not create configuration files list for: %s\n",
459 "Could not get configuration files list for: %s\n",
467 else if (nfiles == 0) {
486 "Looking for csv_conf file for data time: %s\n",
489 for (fi = nfiles - 1; fi > -1; --fi) {
490 file_time = _csv_get_conf_file_name_time(file_list[fi]);
491 if (data_time >= file_time)
break;
499 strncpy(name, file_list[fi], PATH_MAX);
525 static char *_csv_split_delim(
char *strp,
char delim)
531 if (!delimp)
return((
char *)NULL);
536 while (endp > strp && isspace(*endp)) *endp-- =
'\0';
554 static void _csv_strip_comments(
char *file_data)
556 char *cp1 = file_data;
557 char *cp2 = file_data;
560 while (*cp2 !=
'\0') {
562 if (*cp2 ==
'"' || *cp2 ==
'\'') {
570 while (*cp2 !=
'\0') {
574 if (*(cp2+1) == quote) {
585 else if (*cp2 ==
'#') {
589 for (++cp2; *cp2 !=
'\n' && *cp2 !=
'\0'; ++cp2);
603 static void _csv_trim_eol(
char *linep)
605 char *eol = linep + strlen(linep) - 1;
606 while ((eol >= linep) && isspace(*eol)) *eol-- =
'\0';
616 static char *_csv_trim_quotes(
char *linep)
618 size_t length = strlen(linep);
622 if ((*linep ==
'"' || *linep ==
'\'') &&
623 linep[length-1] == *linep) {
625 linep[length-1] =
'\0';
647 static int _csv_load_conf_file(
653 char full_path[PATH_MAX];
654 struct stat file_stats;
683 snprintf(full_path, PATH_MAX,
"%s/%s", path, name);
687 if (stat(full_path, &file_stats) < 0) {
690 "Could not get file stats for conf file: %s\n"
691 " -> %s\n", full_path, strerror(errno));
700 nbytes = file_stats.st_size;
701 if (nbytes == 0)
return(1);
703 file_data = (
char *)malloc((nbytes + 1) *
sizeof(char));
707 "Memory allocation error loading conf file: %s\n",
715 fp = fopen(full_path,
"r");
719 "Could not open file: %s\n"
720 " -> %s\n", full_path, strerror(errno));
728 nread = fread(file_data, 1, nbytes, fp);
731 if (nread != nbytes) {
734 "Could not read conf file: %s\n"
735 " -> %s\n", full_path, strerror(errno));
743 file_data[nbytes] =
'\0';
747 _csv_strip_comments(file_data);
752 buffer = calloc(buflen,
sizeof(
char *));
753 if (!buffer)
goto MEMORY_ERROR;
761 for (linep = file_data; *linep !=
'\0'; linep = eol + 1) {
778 if (*(eol - 1) ==
'\r') {
789 _csv_trim_eol(linep);
793 if (*linep ==
'\0') {
799 if (isalpha(*linep)) {
801 linelen = strlen(linep);
806 if (keylen > linelen)
continue;
809 if (isspace(chr) || chr ==
':' || chr ==
'=' || chr ==
'\0') {
810 if (strncmp(linep,
_ConfKeys[ki], keylen) == 0) {
824 while (isspace(*linep) || *linep ==
':' || *linep ==
'=') ++linep;
829 "Invalid keyword found on line %d in file: %s\n"
831 linenum, full_path, linep);
843 while (isspace(*linep)) ++linep;
848 if (*linep ==
'\0') {
858 "Invalid configuration file: %s\n"
859 " -> keyword not found before first line of text\n",
870 if (strcmp(key,
"FILE_NAME_PATTERNS") == 0) {
876 if (buflen < count) {
877 buffer = realloc(buffer, count *
sizeof(
char *));
878 if (!buffer)
goto MEMORY_ERROR;
889 else if (strcmp(key,
"FILE_TIME_PATTERNS") == 0) {
894 if (buflen < count) {
895 buffer = realloc(buffer, count *
sizeof(
char *));
896 if (!buffer)
goto MEMORY_ERROR;
907 else if (strcmp(key,
"DELIMITER") == 0) {
909 linep = _csv_trim_quotes(linep);
910 if (*linep ==
'\\' && *(linep+1) ==
't') {
914 conf->
delim = *linep;
917 else if (strcmp(key,
"HEADER_LINE") == 0) {
931 else if (strcmp(key,
"HEADER_LINE_TAG") == 0) {
933 linep = _csv_trim_quotes(linep);
939 else if (strcmp(key,
"HEADER_LINE_NUMBER") == 0) {
941 linep = _csv_trim_quotes(linep);
944 else if (strcmp(key,
"NUMBER_OF_HEADER_LINES") == 0) {
946 linep = _csv_trim_quotes(linep);
949 else if (strcmp(key,
"NUMBER_OF_COLUMNS") == 0) {
951 linep = _csv_trim_quotes(linep);
954 else if (strcmp(key,
"TIME_COLUMN_PATTERNS") == 0) {
959 linep = _csv_split_delim(linep,
':');
961 if (!linep || *linep ==
'\0') {
964 "Invalid time column format found on line %d in file: %s\n"
965 " -> expected format: name: pattern(s)\n",
976 if (buflen < count) {
977 buffer = realloc(buffer, count *
sizeof(
char *));
978 if (!buffer)
goto MEMORY_ERROR;
989 else if (strcmp(key,
"SPLIT_INTERVAL") == 0) {
991 linep = _csv_trim_quotes(linep);
997 else if (strcmp(key,
"FIELD_MAP") == 0) {
1002 linep = _csv_split_delim(linep,
':');
1004 if (!linep || *linep ==
'\0') {
1007 "Invalid field map format found on line %d in file: %s\n"
1008 " -> expected format: dod_var_name: csv column name [, csv units [, csv missing value string]]\n",
1009 linenum, full_path);
1018 if (buflen < count) {
1019 buffer = realloc(buffer, count *
sizeof(
char *));
1020 if (!buffer)
goto MEMORY_ERROR;
1027 conf, out_name, buffer[0], count - 1, (
const char **)(buffer + 1))) {
1041 if (buffer) free(buffer);
1047 if (buffer) free(buffer);
1050 "Memory allocation error loading CSV configuration file: %s/%s\n",
1083 const char *out_name,
1084 const char *col_name,
1099 if (out_name && *out_name !=
'\0') {
1103 if (strcmp(map->
out_name, out_name) == 0) {
1130 if (!list)
goto MEMORY_ERROR;
1137 if (out_name && *out_name !=
'\0') {
1139 if (!map->
out_name)
goto MEMORY_ERROR;
1152 if (args[0] && *args[0] !=
'\0') {
1153 map->
units = strdup(args[0]);
1161 if (args[1] && *args[1] !=
'\0') {
1163 map->
missbuf = strdup(args[1]);
1167 map->
missings = calloc(count,
sizeof(
char *));
1168 if (!map->
missings)
goto MEMORY_ERROR;
1180 "Memory allocation error appending an entry to the CSV field map\n");
1203 const char **patterns)
1210 list = (
const char **)realloc(
1211 conf->
fn_patterns, length *
sizeof(
const char *));
1213 if (!list)
goto MEMORY_ERROR;
1217 for (i = conf->
fn_npatterns, j = 0; j < npatterns; ++i, ++j) {
1219 if (patterns[j] && *patterns[j] !=
'\0') {
1231 "Memory allocation error adding CSV file name patterns to configuration structure\n");
1254 const char **patterns)
1263 list = (
const char **)realloc(
1264 conf->
ft_patterns, length *
sizeof(
const char *));
1266 if (!list)
goto MEMORY_ERROR;
1270 for (i = conf->
ft_npatterns, j = 0; j < npatterns; ++i, ++j) {
1272 if (patterns[j] && *patterns[j] !=
'\0') {
1284 "Memory allocation error adding CSV file time patterns to configuration structure\n");
1309 const char **patterns)
1314 const char **strlist;
1325 if (strcmp(time_col->
name, name) == 0) {
1338 if (!list)
goto MEMORY_ERROR;
1345 time_col->
name = strdup(name);
1346 if (!time_col->
name)
goto MEMORY_ERROR;
1353 length = time_col->
npatterns + npatterns;
1354 strlist = (
const char **)realloc(
1355 time_col->
patterns, length *
sizeof(
const char *));
1357 if (!strlist)
goto MEMORY_ERROR;
1361 for (i = time_col->
npatterns, j = 0; j < npatterns; ++i, ++j) {
1363 if (patterns[j] && *patterns[j] !=
'\0') {
1364 time_col->
patterns[i] = strdup(patterns[j]);
1365 if (!time_col->
patterns[i])
goto MEMORY_ERROR;
1375 "Memory allocation error adding CSV time column patterns to configuration structure\n");
1403 length = strlen(conf->
header_line) + strlen(
string) + 1;
1404 header = (
char *)realloc(
1405 (
void *)conf->
header_line, length *
sizeof(char));
1407 if (!header)
goto MEMORY_ERROR;
1411 strcat(header,
string);
1423 "Memory allocation error appending a string to the CSV header line\n");
1520 if (time_col->
name) free((
void *)time_col->
name);
1524 for (j = 0; j < time_col->
npatterns; ++j) {
1618 const char **missings;
1619 int fi, mi, mvi, ti;
1628 if (!maps)
goto MEMORY_ERROR;
1633 cds, NULL, 0, &cds_vars, NULL, NULL);
1640 for (fi = 0; fi < csv->
nfields; ++fi) {
1642 for (ti = 0; ti < csv->
ntc; ++ti) {
1649 if (ti != csv->
ntc)
continue;
1668 if (!map->
csv_name)
goto MEMORY_ERROR;
1674 if (!map->
cds_name)
goto MEMORY_ERROR;
1676 else if (csv_nvars <= cds_nvars) {
1680 map->
cds_name = strdup(cds_vars[mi]->name);
1681 if (!map->
cds_name)
goto MEMORY_ERROR;
1688 if (!map->
cds_name)
goto MEMORY_ERROR;
1696 if (!isalnum(*strp)) *strp =
'_';
1703 if (field_map->
units) {
1715 map->
csv_missings = calloc(nmissings + 1,
sizeof(
const char *));
1717 for (mvi = 0; mvi < nmissings; ++mvi) {
1732 for (fi = 0; fi < csv->
nfields; ++fi) {
1736 for (ti = 0; ti < csv->
ntc; ++ti) {
1742 if (ti != csv->
ntc)
continue;
1746 if (csv->
headers[fi] == NULL ||
1757 if (!map->
csv_name)
goto MEMORY_ERROR;
1761 if (csv_nvars <= cds_nvars) {
1765 map->
cds_name = strdup(cds_vars[mi]->name);
1766 if (!map->
cds_name)
goto MEMORY_ERROR;
1773 if (!map->
cds_name)
goto MEMORY_ERROR;
1781 if (!isalnum(*strp)) *strp =
'_';
1797 "Memory allocation error creating CSV2CDSMap structure\n");
1815 if (conf->
proc) free((
void *)conf->
proc);
1816 if (conf->
site) free((
void *)conf->
site);
1817 if (conf->
fac) free((
void *)conf->
fac);
1818 if (conf->
name) free((
void *)conf->
name);
1859 for (fi = 0; map[fi].
csv_name; ++fi) {
1861 free((
void *)map[fi].cds_name);
1862 free((
void *)map[fi].csv_name);
1863 free((
void *)map[fi].csv_units);
1865 if (map[fi].csv_missings) {
1868 free((
void *)map[fi].csv_missings[mvi]);
1871 free(map[fi].csv_missings);
1905 !(conf->
proc = strdup(proc)) ||
1906 !(conf->
site = strdup(site)) ||
1907 !(conf->
fac = strdup(fac)) ||
1908 !(conf->
name = strdup(name)) ||
1909 (level && !(conf->
level = strdup(level)))) {
1916 "Memory allocation error initializing CSV configuration structure\n");
1956 char path[PATH_MAX];
1957 char name[PATH_MAX];
1960 status = _csv_find_conf_file(conf, data_time, flags, path, name);
1961 if (status <= 0)
return(status);
1967 (strcmp(conf->
file_name, name) == 0) ) {
1974 if (!_csv_load_conf_file(conf, path, name, 0)) {
1986 if (!conf->
file_path)
goto MEMORY_ERROR;
1990 if (!conf->
file_name)
goto MEMORY_ERROR;
1997 "Memory allocation error initializing CSV configuration structure\n");
2022 fprintf(fp,
"CSV Configuration Structure\n\n");
2026 fprintf(fp,
"FILE_NAME_PATTERNS:\n\n");
2037 fprintf(fp,
"FILE_TIME_PATTERNS:\n\n");
2047 fprintf(fp,
"DELIMITER:\n\n '%c'\n\n", conf->
delim);
2051 fprintf(fp,
"HEADER_LINE:\n\n %s\n\n", conf->
header_line);
2055 fprintf(fp,
"HEADER_LINE_TAG:\n\n %s\n\n", conf->
header_tag);
2059 fprintf(fp,
"HEADER_LINE_NUMBER:\n\n %d\n\n", conf->
header_linenum);
2063 fprintf(fp,
"NUMBER_OF_HEADER_LINES:\n\n %d\n\n", conf->
header_nlines);
2067 fprintf(fp,
"NUMBER_OF_COLUMNS:\n\n %d\n\n", conf->
exp_ncols);
2072 fprintf(fp,
"TIME_COLUMN_PATTERNS:\n\n");
2078 fprintf(fp,
" %s:", time_col->
name);
2082 fprintf(fp,
" %s", time_col->
patterns[0]);
2084 for (j = 1; j < time_col->
npatterns; ++j) {
2085 fprintf(fp,
", %s", time_col->
patterns[j]);
2100 fprintf(fp,
"FIELD_MAP:\n\n");
2107 fprintf(fp,
" %s:", map->
out_name);
2115 fprintf(fp,
", %s", map->
units);
2118 fprintf(fp,
", %s", map->
missings[0]);
2122 fprintf(fp,
" \"%s", map->
missings[0]);
2125 fprintf(fp,
", %s", map->
missings[j]);