68 int _dsproc_compare_samples(
89 if (!time_dim1 || !time_dim2) {
90 if (time_dim1 || time_dim2) {
98 for (vi = 0; vi < dataset1->
nvars; vi++) {
103 var1 = dataset1->
vars[vi];
104 if ((var1->
dims[0] != time_dim1) ||
112 if ((strcmp(var1->
name,
"time") == 0) ||
113 (strcmp(var1->
name,
"time_offset") == 0)) {
121 if (!var2)
return(0);
126 if ((var2->
dims[0] != time_dim2) ||
137 if (var1_count > count) var1_count = count;
140 if (var2_count > count) var2_count = count;
142 if (var1_count > var2_count)
return(0);
146 if (var1->
type != var2->
type)
return(0);
153 if (!sample_size)
continue;
159 data1 = var1->
data.
bp + (start1 * nbytes);
160 data2 = var2->
data.
bp + (start2 * nbytes);
162 if (memcmp(data1, data2, var1_count * nbytes) != 0)
return(0);
177 void _dsproc_delete_samples(
196 for (vi = 0; vi < dataset->
nvars; vi++) {
201 var = dataset->
vars[vi];
202 if ((var->
dims[0] != time_dim) ||
211 if (nbytes == 0)
continue;
217 for (ti = 0; ti < nsamples; ti++) {
224 if (data1 != data2) {
225 memcpy(data1, data2, nbytes);
243 for (ti = 0; ti < *ntimes; ti++) {
247 if (time1 != time2) {
258 time_dim->
length = *ntimes = nsamples;
282 int _dsproc_filter_duplicate_samples(
289 char *errmsg = (
char *)NULL;
290 const char *status = (
char *)NULL;
291 int *filter_mask = (
int *)NULL;
292 int overlap_type = 0;
293 size_t noverlaps = 0;
295 size_t total_filtered = 0;
297 char ts1[32], ts2[32];
298 size_t mi, ti, tj, tii, tjj;
301 "%s: Checking for overlapping samples in dataset\n",
307 for (tj = 1; tj < *ntimes; ++tj) {
313 if (
TV_LT(time1, time2)) {
322 for (ti = 0; ti < tj; ++ti) {
323 if (
TV_GTEQ(times[ti], time2))
break;
329 if (
TV_EQ(times[ti], time2)) {
334 for (tii = ti+1, tjj = tj+1; tii < tj; ++tii, ++tjj) {
335 if (
TV_NEQ(times[tii], times[tjj]))
break;
344 for (tjj = tj+1; tjj < *ntimes; ++tjj) {
345 if (
TV_GT(times[tjj], time1))
break;
348 noverlaps = tjj - tj;
361 "%s: Invalid time order found in dataset\n"
362 " -> '%s' < '%s': time of record %d < time of previous record\n",
363 dataset->
name, ts2, ts1, (
int)tj);
374 if (!_dsproc_compare_samples(dataset, tj, dataset, ti, ndups)) {
392 "%s: Overlapping records found in dataset\n"
393 " -> '%s': time of record %d = time of record %d\n",
394 dataset->
name, ts1, (
int)tj, (
int)ti);
402 "%s: Overlapping records found in dataset\n"
403 " -> '%s' to '%s': records %d to %d overlap records %d to %d\n",
405 ts1, ts2, (
int)tj, (
int)(tjj-1), (
int)ti, (
int)(tii-1));
415 if (total_filtered == 0) {
424 "%s: Filtering overlapping records in dataset\n",
430 "%s: Filtering duplicate records in dataset\n",
434 filter_mask = (
int *)calloc(*ntimes,
sizeof(
int));
438 "Could not filter overlapping records from dataset: %s\n"
439 " -> memory allocation error\n",
449 for (mi = tj; mi < tjj; ++mi) {
453 total_filtered += ndups + noverlaps;
464 " - '%s': record %d is identical to record %d\n",
465 ts1, (
int)tj, (
int)ti);
472 " - '%s' to '%s': records %d to %d are identical to records %d to %d\n",
473 ts1, ts2, (
int)tj, (
int)(tjj-1), (
int)ti, (
int)(tii-1));
477 else if (noverlaps) {
481 if (noverlaps == 1) {
483 if (overlap_type == 1) {
485 " - '%s': record %d overlaps previous records (invalid time order)\n",
490 " - '%s': record %d overlaps record %d (data values do not match)\n",
491 ts1, (
int)tj, (
int)ti);
498 if (overlap_type == 1) {
500 " - '%s' to '%s': records %d to %d overlap previous records (invalid time order)\n",
501 ts1, ts2, (
int)tj, (
int)(tjj-1));
505 " - '%s' to '%s': records %d to %d overlap records %d to %d (data values do not match)\n",
506 ts1, ts2, (
int)tj, (
int)(tjj-1), (
int)ti, (
int)(tii-1));
515 if (total_filtered) {
523 " - filtering aborted\n\n%s", errmsg);
527 _dsproc_delete_samples(ntimes, times, filter_mask, dataset);
530 " - total records filtered: %d\n", total_filtered);
571 int _dsproc_filter_stored_samples(
579 char *errmsg = (
char *)NULL;
580 const char *status = (
char *)NULL;
581 int *filter_mask = (
int *)NULL;
582 int found_overlap = 0;
583 int overlap_type = 0;
584 size_t noverlaps = 0;
586 size_t total_filtered = 0;
597 char ts1[32], ts2[32];
599 int mi, ti, tj, tii, tjj;
602 "%s: Checking For overlaps with previously stored data\n",
608 ndsfiles = _dsproc_find_dsfiles(
609 ds->dir, &(times[0]), &(times[*ntimes - 1]), &dsfiles);
611 if (ndsfiles < 0)
return(0);
612 if (ndsfiles == 0)
return(1);
618 "Could not filter previously stored records from dataset: %s\n"
619 " -> memory allocation error\n",
630 if (!_dsproc_fetch_timevals(ds,
631 ndsfiles, dsfiles, NULL, &(times[0]),
632 &obs_ntimes, &obs_start)) {
634 if (obs_ntimes != 0)
return(0);
635 obs_start = times[0];
639 if (!_dsproc_fetch_timevals(ds,
640 ndsfiles, dsfiles, &(times[*ntimes - 1]), NULL,
641 &obs_ntimes, &obs_end)) {
643 if (obs_ntimes != 0)
return(0);
644 obs_end = times[*ntimes - 1];
648 obs_start = times[0];
649 obs_end = times[*ntimes - 1];
652 nobs = _dsproc_fetch_dataset(
653 ndsfiles, dsfiles, &obs_start, &obs_end,
654 0, NULL, 0, fetched);
673 for (oi = 0; oi < nobs; oi++) {
675 obs = fetched->
groups[oi];
683 if (obs_ntimes != 0) {
693 obs_start = obs_times[0];
694 obs_end = obs_times[obs_ntimes - 1];
726 for (ti = si; ti <= ei; ++ti) {
734 while (
TV_LT(obs_times[tj], ds_time) ) ++tj;
738 if (
TV_NEQ(obs_times[tj], ds_time) ) {
752 for (tii = ti+1, tjj = tj;
753 tii <= ei && tjj < (int)obs_ntimes;
756 if (
TV_EQ(times[tii], obs_times[tjj]) )
break;
757 while (
TV_GT(times[tii], obs_times[tjj]) ) ++tjj;
760 noverlaps = tii - ti;
772 for (tii = ti+1, tjj = tj+1;
773 tii <= ei && tjj < (int)obs_ntimes;
776 if (
TV_NEQ(times[tii], obs_times[tjj]) )
break;
783 if (!_dsproc_compare_samples(dataset, ti, obs, tj, ndups)) {
798 if (total_filtered == 0) {
805 "%s: Filtering data previously stored in file: %s\n",
808 filter_mask = (
int *)calloc(*ntimes,
sizeof(
int));
812 "Could not filter previously stored records from dataset: %s\n"
813 " -> memory allocation error\n",
825 for (mi = ti; mi < tii; ++mi) {
829 total_filtered += ndups + noverlaps;
840 " - '%s': duplicate record %d\n",
849 " - '%s' to '%s': duplicate records %d to %d\n",
850 ts1, ts2, (
int)ti, (
int)(tii-1));
853 else if (noverlaps) {
855 if (noverlaps == 1) {
859 if (overlap_type == 1) {
861 " - '%s': overlapping record %d (times do not match)\n",
866 " - '%s': overlapping record %d (data values do not match)\n",
875 if (overlap_type == 1) {
877 " - '%s' to '%s': overlapping records %d to %d (times do not match)\n",
878 ts1, ts2, (
int)ti, (
int)(tii-1));
882 " - '%s' to '%s': overlapping records %d to %d (data values do not match)\n",
883 ts1, ts2, (
int)ti, (
int)(tii-1));
888 if ((tii > ei) || (tjj == (
int)obs_ntimes))
break;
897 if (found_overlap)
break;
914 "%s: Overlapping records found with previously stored data\n"
915 " -> '%s': record %d overlaps data in: %s\n",
924 "%s: Overlapping records found with previously stored data\n"
925 " -> '%s' to '%s': records %d to %d overlap data in: %s\n",
926 dataset->
name, ts1, ts2, ei, si, obs->
name);
934 "%s: Overlapping records found with previously stored data\n"
935 " -> '%s' to '%s': records %d to %d overlap data in: %s\n",
936 dataset->
name, ts1, ts2, si, ei, obs->
name);
942 if (total_filtered) {
950 " - filtering aborted\n\n%s", errmsg);
954 _dsproc_delete_samples(ntimes, times, filter_mask, dataset);
957 " - total records filtered: %d\n", total_filtered);
981 for (oi = 0; oi < nobs; oi++) {
983 obs = fetched->
groups[oi];
988 if (!obs_ntimes)
continue;
994 if (si < 0)
continue;
997 if (ei < 0)
continue;
1006 "%s: Overlapping records found with previously stored data\n"
1007 " -> '%s': record %d overlaps data in: %s\n",
1008 dataset->
name, ts1, si, obs->
name);
1016 "%s: Overlapping records found with previously stored data\n"
1017 " -> '%s' to '%s': records %d to %d overlap data in: %s\n",
1018 dataset->
name, ts1, ts2, ei, si, obs->
name);
1026 "%s: Overlapping records found with previously stored data\n"
1027 " -> '%s' to '%s': records %d to %d overlap data in: %s\n",
1028 dataset->
name, ts1, ts2, si, ei, obs->
name);
1088 missings.
vp = (
void *)NULL;
1091 if (nmissings <= 0)
return(nmissings);
1110 float *datap = var->
data.
fp;
1111 float missing = *(missings.
fp);
1115 if (!isfinite(*datap)) {
1125 double *datap = var->
data.
dp;
1126 double missing = *(missings.
dp);
1130 if (!isfinite(*datap)) {
1174 "%s: Checking for Nan/Inf values in dataset\n",
1179 for (vi = 0; vi < dataset->
nvars; ++vi) {
1181 var = dataset->
vars[vi];
1201 if (found_nans < 0) {
1216 "%s: Replacing NaN/Inf values with missing values\n",
1221 " - %s: replaced %d NaN/Inf values\n",
1222 var->
name, found_nans);
1225 total_nans += found_nans;
1235 " - total NaN/Inf values replaced: %d\n", total_nans);