* val_name,
200 const char* errformat =
"Specified %s character does not match NEXUS" 201 " comment in file (specified %s, comment %c)";
210eip->
message= (
char*)
malloc(strlen (errformat) + strlen (val_name)
215errfunc (eip, errdata);
235 const char* err_format =
236 "%d bad characters (%c) found at position %d (%s).";
238 if(errfunc ==
NULL|| num_bad == 0 || bad_char == 0
239|| reason ==
NULL) {
252+ strlen (reason) + 3);
255sprintf (eip->
message, err_format, num_bad, bad_char,
offset, reason);
257errfunc (eip, errdata);
269 void* report_error_userdata)
273 if(report_error ==
NULL) {
284report_error (eip, report_error_userdata);
296 void* report_error_userdata)
300 if(report_error ==
NULL) {
310eip->
message=
strdup(
"Inconsistent block line formatting");
311report_error (eip, report_error_userdata);
324 void* report_error_userdata)
328 const char*
format=
"Expected line length %d, actual length %d";
331 if(lip ==
NULL|| report_error ==
NULL) {
352report_error(eip, report_error_userdata);
366 void* report_error_userdata)
369 const char* err_format =
"Expected %d lines in block, found %d";
371 if(report_error ==
NULL) {
384sprintf (eip->
message, err_format, expected_num, actual_num);
386report_error (eip, report_error_userdata);
398 void* report_error_userdata)
401 const char* err_format =
"Duplicate ID! Sequences will be concatenated!";
403 if(report_error ==
NULL) {
415report_error (eip, report_error_userdata);
426 void* report_error_userdata)
430 if(report_error ==
NULL) {
440report_error (eip, report_error_userdata);
454 void* report_error_userdata)
457 const char* format_str =
"Expected sequence length %d, actual length %d";
459 if(report_error ==
NULL) {
470sprintf (eip->
message, format_str, expected_length, actual_length);
472report_error (eip, report_error_userdata);
484 void* report_error_userdata)
487 const char* err_format =
"Expected %d sequences, found %d";
489 if(report_error ==
NULL) {
501sprintf (eip->
message, err_format, num_expected, num_found);
503report_error (eip, report_error_userdata);
512 void* report_error_userdata)
515 const char* err_format =
"Expected sequences of length %d, found %d";
517 if(report_error ==
NULL) {
528sprintf (eip->
message, err_format, len_expected, len_found);
530report_error (eip, report_error_userdata);
540 void* report_error_userdata)
544 if(report_error ==
NULL) {
554report_error (eip, report_error_userdata);
565 void* report_error_userdata)
568 const char* err_format =
"ID %s appears in the following locations:";
594line_number !=
NULL;
595line_number = line_number->
next) {
596sprintf(cp,
" %d", line_number->
ival);
600report_error(eip, report_error_userdata);
613 const char*
msg=
"This is an ASN.1 file, " 614 "which cannot be read by this function.";
616 if(errfunc ==
NULL) {
627errfunc (eip, errdata);
642 const char*
msg=
"This file contains sequences in brackets (indicating " 643 "a segmented alignment) as well as sequences not in brackets at lines " 644 "%s. Please either add or remove brackets to correct this problem.";
645 size_tnum_lines = 0;
648 char* line_text_list;
649 char* line_text_list_offset;
651 if(errfunc ==
NULL|| offset_list ==
NULL) {
654 for(
t= offset_list;
t!=
NULL;
t=
t->next) {
662line_text_list = (
char*)
malloc(msg_len);
663 if(line_text_list ==
NULL)
return;
664line_text_list_offset = line_text_list;
666 for(
t= offset_list;
t!=
NULL;
t=
t->next) {
667 if(
t->next ==
NULL)
669sprintf (line_text_list_offset,
"%d",
t->ival);
671 else if(num_lines == 2)
673sprintf (line_text_list_offset,
"%d and ",
t->ival);
675 else if(
t->next->next ==
NULL)
677sprintf (line_text_list_offset,
"%d, and ",
t->ival);
681sprintf (line_text_list_offset,
"%d, ",
t->ival);
683line_text_list_offset += strlen (line_text_list_offset);
686msg_len += strlen(
msg) + 1;
693sprintf(eip->
message,
msg, line_text_list);
695errfunc(eip, errdata);
697 free(line_text_list);
710 const char*
msg=
"This line may contain an improperly formatted organism description.\n" 711 "Organism descriptions should be of the form [org=tax name] or [organism=tax name].\n";
713 if(errfunc ==
NULL|| linestring ==
NULL) {
720eip->
message= (
char*)
malloc(strlen (
msg) + strlen (linestring) + 1);
725errfunc (eip, errdata);
741 const char*
msg=
"This segmented set contains a different number of segments (%d) than expected (%d).\n";
743 if(errfunc ==
NULL) {
753sprintf (eip->
message,
msg, num_seg, num_seg_exp);
755errfunc (eip, errdata);
804(
intline_num_start,
811 const char* errformat1 =
"Line %d could not be assigned to an interleaved block";
812 const char* errformat2 =
"Lines %d through %d could not be assigned to an interleaved block";
813 const char* errformat3 =
"Contents of unused line: %s";
816 if(errfunc ==
NULL|| line_val ==
NULL) {
824 if(line_num_start == line_num_stop) {
827sprintf (eip->
message, errformat1, line_num_start);
832sprintf (eip->
message, errformat2, line_num_start,
836errfunc (eip, errdata);
839 for(skip = line_num_start;
840skip < line_num_stop + 1 && line_val !=
NULL;
850+ strlen (line_val->
data) + 1);
852sprintf (eip->
message, errformat3, line_val->
data);
854errfunc (eip, errdata);
856line_val = line_val->
next;
1034 if(list ==
NULL) {
1061 if(best ==
NULL) {
1136 if(
last==
NULL||
last->size_value != size_value) {
1150 last->num_appearances ++;
1176sip1 = sip1->
next, sip2 = sip2->
next) {
1200 if(list ==
NULL) {
1205prev_llp = prev_llp->
next;
1211prev_llp->
next= llp;
1229(
const char*
string,
1257 while(lip !=
NULL) {
1258next_lip = lip->
next;
1277 while(lip !=
NULL) {
1278nextlip = lip->
next;
1305 const char*
string,
1311 if(
string==
NULL) {
1318 if(list ==
NULL) {
1333 const char*
string,
1340 if(comment ==
NULL) {
1346 if(list !=
NULL) {
1348list = list->
next;
1350list->
next= comment;
1359 if(list ==
NULL) {
1370 const char*
string,
1374 if(comment ==
NULL) {
1385 intnum_segments = 0;
1386 EBoolskipped_line_since_last_defline =
eTrue;
1394 if(lip->
data[0] !=
'['|| strspn(lip->
data+ 1,
" \t\r\n") != strlen (lip->
data+ 1))
1401 if(lip->
data[0] ==
'>')
1403 if(!skipped_line_since_last_defline)
1410skipped_line_since_last_defline =
eFalse;
1415skipped_line_since_last_defline =
eTrue;
1423(lip->
data[0] !=
']'|| strspn (lip->
data+ 1,
" \t\r\n") != strlen (lip->
data+ 1)))
1428 returnnum_segments;
1444 intnum_segments = 1;
1445 intnum_segments_this_bracket;
1446 intnum_segments_expected;
1449 if(comment_list ==
NULL)
1451 returnnum_segments;
1454 for(comment = comment_list; comment !=
NULL; comment = comment->
next)
1458num_segments_this_bracket,
1460 if(comment != comment_list && segcount_list->
next!=
NULL)
1465 if(num_segments_expected != num_segments_this_bracket)
1468num_segments_this_bracket, num_segments_expected,
1478 returnnum_segments;
1490 if(comment_list ==
NULL)
1495 for(comment = comment_list; comment !=
NULL; comment = comment->
next)
1506 if(offset_list ==
NULL) offset_list = new_offset;
1554first_token =
NULL;
1556 for(lip = line_list; lip !=
NULL; lip = lip->
next) {
1559 while(piece !=
NULL) {
1560line_pos = piece -
tmp;
1587 if(list ==
NULL) {
1591 for(lip = list; lip !=
NULL; lip = lip->
next) {
1601 for(lip = list; lip !=
NULL; lip = lip->
next) {
1603cp_from = lip->
data;
1604 while(*cp_from != 0) {
1605 if(!
isspace((
unsigned char)*cp_from)) {
1667 if(lirp ==
NULL) {
1700 if(line_list ==
NULL) {
1704 if(lirp ==
NULL) {
1759 if(lirp ==
NULL) {
1830 if(new_item ==
NULL) {
1843 last->next = new_item;
1855 if(list ==
NULL) {
1880 if(
string==
NULL) {
1883add_to = add_to->
next) {
1891add_to = add_to->
next) {
1896 if(add_to ==
NULL) {
1898 if(list ==
NULL) list = add_to;
1899 if(add_to !=
NULL) {
1903 if(add_to !=
NULL) {
1923 intchar_count, diff;
1925 if(str1 ==
NULL&& str2 ==
NULL) {
1928 if(str1 ==
NULL) {
1931 if(str2 ==
NULL) {
1937 while(*cp1 != 0 && *cp2 != 0 && char_count < cmp_count) {
1938diff =
toupper((
unsigned char)(*cp1)) -
toupper((
unsigned char)(*cp2));
1946 if(char_count == cmp_count) {
1948}
else if(*cp1 == 0 && *cp2 != 0) {
1950}
else if(*cp1 != 0 && *cp2 == 0) {
1967 if(str1 ==
NULL&& str2 ==
NULL) {
1970 if(str1 ==
NULL) {
1973 if(str2 ==
NULL) {
1978 while(*cp1 != 0 && *cp2 != 0) {
1979diff =
toupper((
unsigned char) *cp1) -
toupper((
unsigned char) *cp2);
1986 if(*cp1 == 0 && *cp2 != 0) {
1988}
else if(*cp1 != 0 && *cp2 == 0) {
2020 while(!
isdigit((
unsigned char)*cp) && *cp != 0) {
2025 while(
isdigit((
unsigned char)*cpend) && *cpend != 0) {
2037 while(!
isdigit((
unsigned char)*cp) && *cp != 0) {
2042 while(
isdigit((
unsigned char)*cpend) && *cpend != 0) {
2053 if(
first> 0 && second > 0) {
2078 if(!
isdigit((
unsigned char)*cp) && !
isspace((
unsigned char)*cp)) {
2081 if(! found_first_number) {
2082 if(!
isdigit((
unsigned char)*cp)) {
2085found_first_number =
eTrue;
2086}
else if(! found_dividing_space) {
2087 if(
isspace((
unsigned char) *cp)) {
2088found_dividing_space =
eTrue;
2089}
else if( !
isdigit((
unsigned char)*cp)) {
2092}
else if(! found_second_number) {
2093 if(
isdigit((
unsigned char)*cp)) {
2094found_second_number =
eTrue;
2095}
else if(!
isspace((
unsigned char) *cp)) {
2098}
else if(! found_second_number_end) {
2099 if(
isspace((
unsigned char) *cp)) {
2100found_second_number_end =
eTrue;
2101}
else if(!
isdigit((
unsigned char)*cp)) {
2104}
else if(!
isspace((
unsigned char) *cp)) {
2109 if(found_second_number) {
2124(
const char*
str,
2125 const char* valname,
2137cpstart = (
char*) strstr (
str, valname);
2138 if(cpstart ==
NULL) {
2141cpstart += strlen (valname);
2142 while(*cpstart != 0 &&
isspace((
unsigned char)*cpstart)) {
2145 if(*cpstart !=
'=') {
2149 while(*cpstart != 0 &&
isspace((
unsigned char)*cpstart)) {
2153 if(!
isdigit((
unsigned char)*cpstart)) {
2156cpend = cpstart + 1;
2157 while( *cpend != 0 &&
isdigit((
unsigned char)*cpend)) {
2160maxlen = cpend - cpstart;
2164strncpy(
buf, cpstart, maxlen);
2178(
const char*
str,
2180 EBool* found_nchar,
2187|| found_ntax ==
NULL|| afrp ==
NULL) {
2190 if(! *found_ntax &&
2195*found_ntax =
eTrue;
2197 if(! *found_nchar &&
2202*found_nchar =
eTrue;
2221cpend = strstr (
str,
";");
2222 if(cpend ==
NULL) {
2225cp = strstr (
str, val_name);
2226 if(cp ==
NULL|| cp > cpend) {
2229cp += strlen (val_name);
2230 while(
isspace((
unsigned char)*cp)) {
2237 while(
isspace((
unsigned char)*cp) || *cp ==
'\'') {
2251(
const char*
str,
2263cp = strstr (
str,
"format ");
2265cp = strstr (
str,
"FORMAT ");
2271 if(errfunc ==
NULL) {
2301 if(c != 0 && sequence_info->
match!=
NULL 2302&& strchr (sequence_info->
match, c) ==
NULL)
2315|| *(
str+ 1) != 0)
2321 str= (
char*)
malloc(2 *
sizeof(
char));
2336(
const char*
str,
2346cp = (
char*) strstr (
str,
"format ");
2348cp = (
char*) strstr (
str,
"FORMAT ");
2386|| strspn (
str,
"*:. \t\r\n") < strlen (
str)
2387|| (strchr (
str,
'*') ==
NULL 2388&& strchr (
str,
':') ==
NULL 2389&& strchr (
str,
'.') ==
NULL)) {
2403 char* last_semicolon;
2408last_semicolon = strrchr (
str,
';');
2409 if(last_semicolon ==
NULL 2410|| strspn (last_semicolon + 1,
" \t\r") != strlen (last_semicolon + 1)
2411|| strchr (
str,
';') != last_semicolon) {
2433 while(*
str!= 0) {
2458||
str[0] ==
';') {
2493 len= strspn (
str,
" \t\r");
2494 if(
len== strlen (
str)) {
2507 if(linestring ==
NULL) {
2523 if(linestring !=
NULL&& strstr (linestring,
"::=") !=
NULL) {
2558 if(
string==
NULL) {
2561cp_start = strstr (
string,
"[");
2562 if(cp_start !=
NULL) {
2563cp_end = strstr (cp_start,
"]");
2564 if(cp_end !=
NULL) {
2569clp->
start= cp_start;
2585 if(linestring ==
NULL) {
2590 while(clp !=
NULL) {
2591strcpy (clp->
start, clp->
end+ 1);
2600 if( linestring [0] ==
'>') {
2605 if(linestring[
offset] == 0) {
2611 if(strspn (linestring,
" \t\r") == strlen (linestring)) {
2636 len= strspn ( clp->
start,
" \t\r");
2638cp_end = strstr (cp,
"=");
2639 if(cp_end ==
NULL) {
2643 while(cp_end > cp &&
isspace((
unsigned char)*cp_end)) {
2647 if((cp_end - cp == 3 &&
s_StringNICmp(cp,
"org", 3) == 0)
2648|| (cp_end - cp == 8 &&
s_StringNICmp(cp,
"organism", 8) == 0)) {
2664 if(
string==
NULL) {
2670 char* pos = clp->
end;
2680 while(next_clp !=
NULL&&
2683clp->
end= next_clp->
end;
2696 charpbuf1024[1024];
2700 const char* to = clp->
start;
2701 const char* from = clp->
end+ 1;
2702 size_tdiff = from - to;
2703 size_t len= strlen(from);
2704 if(diff <
len-1) {
2705 char* pbuf = pbuf1024;
2706 if(
len>
sizeof(pbuf1024)-1) {
2709strcpy(pbuf, clp->
end+ 1);
2710strcpy(clp->
start, pbuf);
2711 if(pbuf != pbuf1024) {
2716strcpy (clp->
start, clp->
end+ 1);
2732 intnext_len, this_len,
len;
2734 if(org_clp ==
NULL) {
2744ordered_start =
NULL;
2746 if(ordered_start ==
NULL) {
2753next_clp = ordered_start;
2754next_len = next_clp->
end- next_clp->
start;
2755this_len = clp->
end- clp->
start;
2756 len= next_len > this_len ? next_len : this_len;
2757 while(next_clp !=
NULL 2760prev_clp = next_clp;
2761next_clp = next_clp->
next;
2762 if(next_clp !=
NULL) {
2763next_len = next_clp->
end- next_clp->
start;
2764 len= next_len > this_len ? next_len : this_len;
2767 if(prev_clp ==
NULL) {
2768clp->
next= ordered_start;
2769ordered_start = clp;
2772prev_clp->
next= clp;
2776clp_list->
next= ordered_start;
2787 char* ordered_org_name;
2790 if(org_clp ==
NULL) {
2794ordered_org_name = (
char*)
malloc(org_clp->
end- org_clp->
start+ 2);
2795 if(ordered_org_name ==
NULL) {
2798ordered_org_name [0] = 0;
2800cp = ordered_org_name;
2801 for(clp = clp_list; clp !=
NULL; clp = clp->
next) {
2809 returnordered_org_name;
2819 intorg_num, defline_num, new_len;
2820 char*empty_defline, *new_defline;
2822 if(afrp ==
NULL|| defline ==
NULL) {
2830 while(lip !=
NULL)
2838 while(lip !=
NULL&& defline_num < org_num) {
2843 if(defline_num == org_num && lip !=
NULL) {
2845 if(strlen (lip->
data) == 0)
2848lip->
data= defline;
2853new_len = strlen (lip->
data) + strlen (defline) + 2;
2854new_defline = (
char*)
malloc(new_len *
sizeof(
char));
2855 if(new_defline !=
NULL)
2857strcpy (new_defline, lip->
data);
2858 strcat(new_defline,
" ");
2859 strcat(new_defline, defline);
2861lip->
data= new_defline;
2874 while(defline_num < org_num - 1)
2876empty_defline = (
char*)
malloc(
sizeof(
char));
2877 if(empty_defline !=
NULL)
2909 if(
string==
NULL||
string[0] !=
'>'|| afrp ==
NULL) {
2914 if(clp ==
NULL&& (strstr (
string,
"org=") !=
NULL|| strstr (
string,
"organism=") !=
NULL))
2924 const intlinelen = strlen(
string);
2931 while(clp !=
NULL) {
2935clp->
start-
string);
2940 if(*clp->
end!= 0) {
2942cp += strspn (cp,
" \t\r\n");
2944defline = clp->
end+ 1;
2945defline_offset = clp->
end-
string+ 1;
2950comment_end = clp->
end;
2972 if(arsp ==
NULL) {
2996 if(arsp ==
NULL) {
3018 for(arsp = list; arsp !=
NULL; arsp = arsp->
next) {
3019 if(
strcmp(arsp->
id,
id) == 0) {
3040 if(
strcmp(arsp->
id,
id) == 0) {
3063arsp = arsp->
next;
3086 intdata_line_offset)
3092 if(arsp ==
NULL) {
3094 if(arsp ==
NULL) {
3097 if(list ==
NULL) list = arsp;
3120 intdata_line_offset)
3126 for(arsp = list; arsp !=
NULL&& curr < index; arsp = arsp->
next) {
3129 if(arsp ==
NULL) {
3145 if(afrp ==
NULL) {
3168 if(afrp ==
NULL) {
3217 if(this_pattern ==
NULL) {
3223 len= strcspn (cp,
" \t\r");
3226cp += strspn (cp,
" \t\r");
3228 returnthis_pattern;
3246line_counter = line_start;
3247 if(next_offset !=
NULL 3248&& next_offset->
ival- line_counter < block_size) {
3252 for(llp = pattern_list;
3254&& (next_offset ==
NULL|| line_counter < next_offset->
ival- 1)
3255&& line_counter - line_start < block_size;
3263 if(line_counter - line_start == block_size) {
3292prev_offset =
NULL;
3293next_offset = offset_list;
3296 while(llp !=
NULL) {
3297 if(next_offset !=
NULL&& line_counter == next_offset->
ival) {
3298prev_offset = next_offset;
3299next_offset = next_offset->
next;
3301 while(line_counter - prev_offset->
ival< block_size
3311 if(forecast_pos > 0) {
3313 if(new_offset ==
NULL) {
3316 if(prev_offset ==
NULL) {
3317new_offset->
next= offset_list;
3318offset_list = new_offset;
3320new_offset->
next= next_offset;
3321prev_offset->
next= new_offset;
3323prev_offset = new_offset;
3325 while(line_counter - prev_offset->
ival< block_size
3353 intblock_line_counter;
3358 if(pattern_list ==
NULL|| afrp ==
NULL 3368 while(llp !=
NULL&& line_val !=
NULL) {
3369 while(llp !=
NULL&& line_val !=
NULL 3370&& (
offset==
NULL|| line_counter < offset->ival)) {
3385line_val = line_val->
next;
3389block_line_counter = 0;
3390 while(block_line_counter < afrp->block_size && llp !=
NULL) {
3396line_val = line_val->
next;
3425 for(llp = pattern_list; llp !=
NULL; llp = llp->
next) {
3426llp_next = llp->
next;
3432 if(size_list ==
NULL) {
3436 if(best_ptr !=
NULL 3441 for(llp = pattern_list; llp !=
NULL; llp = llp->
next) {
3442llp_next = llp->
next;
3447 if(new_offset ==
NULL) {
3474 if(ppline ==
NULL|| *ppline ==
NULL) {
3477 len= strlen (*ppline);
3478ptmp = *ppline +
len- 1;
3479 while(ptmp > *ppline && (*ptmp ==
' '|| *ptmp ==
'\t'|| *ptmp ==
'\r'|| *ptmp ==
'\n'))
3484 len= strspn (*ppline,
" \t\r\n");
3498 intoverall_line_count = 0;
3500 char* linestring = readfunc (pfile);
3508 while(linestring !=
NULL&& linestring [0] != EOF) {
3513 if(in_taxa_comment) {
3514 if(
strncmp(linestring,
"end;", 4) == 0) {
3515in_taxa_comment =
eFalse;
3518}
else if(
strncmp(linestring,
"begin taxa;", 11) == 0) {
3520in_taxa_comment =
eTrue;
3523next_line =
s_LineInfoNew(linestring, overall_line_count, 0);
3524 if(last_line ==
NULL) {
3527last_line->
next= next_line;
3529last_line = next_line;
3532linestring = readfunc (pfile);
3533overall_line_count ++;
3542 EBool* last_line_was_marked_id,
3544 intoverall_line_count)
3558 if(last_pattern !=
NULL) {
3559 while(last_pattern->
next!=
NULL) {
3560last_pattern = last_pattern->
next;
3566 if(linestr [0] ==
'>') {
3571 if(*last_line_was_marked_id)
3584*last_line_was_marked_id =
eTrue;
3590*last_line_was_marked_id =
eFalse;
3592 len= strcspn (linestr,
" \t\r");
3594cp = linestr +
len;
3595 len= strspn (cp,
" \t\r");
3608 if(last_pattern ==
NULL) {
3614last_pattern->
next= this_pattern;
3623 EBooluse_nexus_file_info,
3630 intoverall_line_count;
3644 if(readfunc ==
NULL|| sequence_info ==
NULL) {
3649 if(afrp ==
NULL) {
3662 for(next_line = afrp->
line_list; next_line !=
NULL; next_line = next_line->
next) {
3663linestring = next_line->
data;
3664overall_line_count = next_line->
line_num-1;
3668 s_AfrpProcessFastaGap(afrp, & pattern_list, & last_line_was_marked_id, linestring, overall_line_count);
3677 if(! found_expected_ntax || ! found_expected_nchar) {
3680found_expected_ntax =
eTrue;
3681found_expected_nchar =
eTrue;
3685&found_expected_nchar, afrp);
3688 if(! found_char_comment) {
3689 if(use_nexus_file_info) {
3702 if(in_bracketed_comment) {
3703 len= strspn (linestring,
" \t\r\n");
3704 if(last_comment !=
NULL)
3707overall_line_count,
len);
3709 if(strchr (linestring,
']') !=
NULL) {
3710in_bracketed_comment =
eFalse;
3713}
else if(linestring [0] ==
'['&& strchr (linestring,
']') ==
NULL) {
3714in_bracketed_comment =
eTrue;
3715 len= strspn (linestring,
" \t\r\n");
3718overall_line_count,
len);
3719 if(comment_list ==
NULL)
3721comment_list = last_comment;
3735 if(linestring[0] == 0) {
3736last_line_was_marked_id =
eFalse;
3738 if(pattern_list ==
NULL) {
3739pattern_list = this_pattern;
3740last_pattern = this_pattern;
3742last_pattern->
next= this_pattern;
3743last_pattern = this_pattern;
3750 if(linestring [0] ==
'>') {
3755 if(last_line_was_marked_id)
3763 s_AfrpProcessFastaGap(afrp, & pattern_list, & last_line_was_marked_id, linestring, overall_line_count);
3769last_line_was_marked_id =
eTrue;
3775last_line_was_marked_id =
eFalse;
3777 len= strcspn (linestring,
" \t\r");
3779cp = linestring +
len;
3780 len= strspn (cp,
" \t\r");
3793 if(pattern_list ==
NULL) {
3794pattern_list = this_pattern;
3795last_pattern = this_pattern;
3800last_pattern->
next= this_pattern;
3801last_pattern = this_pattern;
3841 intnum_lines_in_block)
3854 for(lip = first_line, block_offset = 0;
3855lip !=
NULL&& block_offset < num_lines_in_block;
3856lip = lip->
next, block_offset++)
3858linestring = lip->
data;
3859 if(linestring !=
NULL) {
3860 len= strcspn (linestring,
" \t\r");
3861 if(
len> 0 &&
len< strlen (linestring)) {
3863 if(this_id ==
NULL) {
3866strncpy (this_id, linestring,
len);
3867this_id [
len] = 0;
3870 if(arsp !=
NULL) {
3889 intnum_lines_in_block,
3895 int len, block_offset, id_offset;
3902 for(lip = first_line, block_offset = 0;
3903lip !=
NULL&& block_offset < num_lines_in_block;
3904lip = lip->
next, block_offset ++)
3908 len= strcspn (cp,
" \t\r");
3909 if(first_block &&
len== strlen (cp)) {
3916tmp_id = (
char*)
malloc( (
len+ 1) *
sizeof(char));
3917 if(tmp_id ==
NULL) {
3920strncpy (tmp_id, cp,
len);
3923 if(id_offset != block_offset && ! first_block) {
3931cp += strspn (cp,
" \t\r");
3939 for(this_pattern = list;
3940this_pattern !=
NULL;
3941this_pattern = this_pattern->
next)
3947best = this_pattern;
3952 for(lip = first_line, block_offset = 0;
3953lip !=
NULL&& block_offset < num_lines_in_block;
3954lip = lip->
next, block_offset ++)
3958 len= strcspn (cp,
" \t\r");
3959 if(first_block &&
len== strlen (cp)) {
3966tmp_id = (
char*)
malloc( (
len+ 1) *
sizeof(char));
3967 if(tmp_id ==
NULL) {
3970strncpy (tmp_id, cp,
len);
3973cp += strspn (cp,
" \t\r");
4001 intnum_lines_in_block,
4010 EBoolthis_block_has_ids;
4016 for(lip = lines, line_number = 0;
4017lip !=
NULL&& line_number < num_lines_in_block;
4018lip = lip->
next, line_number ++)
4020linestring = lip->
data;
4021 if(linestring !=
NULL) {
4022 if(this_block_has_ids) {
4023 len= strcspn (linestring,
" \t\r");
4024 if(first_block &&
len== strlen (linestring)) {
4032 if(this_id ==
NULL) {
4035strncpy (this_id, linestring,
len);
4036this_id [
len] = 0;
4037cp = linestring +
len;
4038 len= strspn (cp,
" \t\r");
4081 intnum_lines_in_block)
4086 for(lip = first_line, block_offset = 0;
4087lip !=
NULL&& block_offset < num_lines_in_block;
4106 if(afrp ==
NULL) {
4113 while(lip !=
NULL&& offset_ptr !=
NULL 4115 if(in_taxa_comment) {
4117in_taxa_comment =
eFalse;
4120&&
strncmp(lip->
data,
"begin taxa;", 11) == 0) {
4121in_taxa_comment =
eTrue;
4123 if(line_counter == offset_ptr->
ival) {
4127offset_ptr = offset_ptr->
next;
4143 EBoolgen_local_ids)
4150 intpattern_line_counter;
4153 static intnext_local_id = 1;
4155 if(token_list ==
NULL|| offset_list ==
NULL 4156|| anchorpattern ==
NULL 4161 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
4163 if(anchorpattern [curr_seg] ==
NULL|| anchorpattern [curr_seg]->lengthrepeats ==
NULL)
4173 for(offset_ptr = offset_list;
4174offset_ptr !=
NULL&& lip !=
NULL;
4175offset_ptr = offset_ptr->
next)
4177next_offset_ptr = offset_ptr->
next;
4178 while(line_counter < offset_ptr->ival - 1 && lip !=
NULL) {
4183 if(gen_local_ids) {
4184 char* replacement_id = (
char*)
malloc(32 +strlen(lip->
data));
4185sprintf(replacement_id,
"lcl|%d %s", next_local_id++, lip->
data+1);
4187lip->
data= replacement_id;
4189curr_id = lip->
data;
4192 for(sip = anchorpattern[curr_seg]->lengthrepeats;
4195&& (next_offset_ptr ==
NULL 4196|| line_counter < next_offset_ptr->ival - 1);
4199 for(pattern_line_counter = 0;
4200pattern_line_counter < sip->num_appearances
4202&& (next_offset_ptr ==
NULL 4203|| line_counter < next_offset_ptr->ival - 1);
4204pattern_line_counter ++)
4206 if(lip->
data[0] !=
']'&& lip->
data[0] !=
'[') {
4207 if((
int) strlen (lip->
data) != sip->size_value) {
4227line_counter - offset_ptr->
ival,
4276 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
4278list[curr_seg] =
NULL;
4287 for(curr_seg = 0; curr_seg < afrp->
num_segments+ 1; curr_seg ++)
4289best[curr_seg] =
NULL;
4293this_pattern =
NULL;
4300 if(lip->
data==
NULL)
continue;
4301 if(lip->
data[0] ==
']'|| lip->
data[0] ==
'[')
continue;
4302 if(lip->
data[0] ==
'>') {
4303 if(this_pattern !=
NULL) {
4312 if(this_pattern ==
NULL) {
4313 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
4322}
else if(this_pattern !=
NULL) {
4325 while(
isspace((
unsigned char)*cp) ||
isdigit((
unsigned char)*cp)) {
4331 if(this_pattern !=
NULL) {
4336 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg++)
4338 for(this_pattern = list [curr_seg];
4339this_pattern !=
NULL;
4340this_pattern = this_pattern->
next)
4343 if(best [curr_seg] ==
NULL 4346best[curr_seg] = this_pattern;
4352 if(best [curr_seg] !=
NULL) {
4357 if(best [curr_seg] != list [curr_seg]) {
4358this_pattern = list [curr_seg];
4359 while( this_pattern !=
NULL&& this_pattern->
next!= best[curr_seg] ) {
4360this_pattern = this_pattern->
next;
4362 if(this_pattern !=
NULL) {
4374 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
4376 if(best[curr_seg] ==
NULL)
4378 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
4405next_offset = this_offset->
next;
4408 while(lip !=
NULL&& this_offset !=
NULL) {
4409 if(line_count == this_offset->
ival) {
4410 while(lip !=
NULL&&
4411(next_offset ==
NULL 4412|| line_count < next_offset->ival - 1)) {
4415cp += strspn (cp,
" \t\r\n1234567890");
4416 if(cp != lip->
data) {
4417strcpy (lip->
data, cp);
4423this_offset = this_offset->
next;
4424 if(this_offset !=
NULL) {
4425next_offset = this_offset->
next;
4443 EBoolgen_local_ids)
4447 if(afrp ==
NULL) {
4457anchorpattern, afrp, gen_local_ids);
4459 free(anchorpattern);
4473 if(cp_from == cp_to || cp_from ==
NULL|| cp_to ==
NULL) {
4476 while(*cp_to != 0) {
4492 intnum_comment_starts;
4497num_comment_starts = 0;
4499 for(lip = list; lip !=
NULL; lip = lip->
next) {
4504 for(cp = lip->
data; *cp != 0; cp++) {
4506 if(cp_r ==
NULL) {
4508cp = lip->
data- 1;
4512 if(cp_r > lip->
data) {
4514 while(cp_r >= lip->
data&& *cp_r !=
'[') {
4517 if(cp_r < lip->
data) {
4524 if(num_comment_starts > 0) {
4525num_comment_starts --;
4527}
else if(*cp ==
'[') {
4529num_comment_starts ++;
4533 if(num_comment_starts == 0) {
4538}
else if(num_comment_starts > 0) {
4539cp_r = strchr (lip->
data,
'[');
4540 if(cp_r !=
NULL) {
4543in_comment =
eTrue;
4545 if(lip->
data[0] == 0) {
4566 while(lip !=
NULL) {
4570 while(end_lip !=
NULL 4572end_lip = end_lip->
next;
4574 if(end_lip !=
NULL) {
4575 while(start_lip != end_lip) {
4577start_lip = start_lip->
next;
4580lip = end_lip->
next;
4602 TSizeInfoPtrlist_ptr, new_list, best_ptr, return_best;
4605 for(list_ptr = list; list_ptr !=
NULL; list_ptr = list_ptr->
next) {
4606 if(not_this_size != list_ptr->
size_value) {
4613return_best =
NULL;
4614 if(best_ptr !=
NULL) {
4616 if(return_best !=
NULL) {
4640 TSizeInfoPtrlast_line_lengths, sip, sip_next, twoafter;
4641 intbest_last_line_length;
4642 intanchor_line_length;
4644 if(anchorpattern ==
NULL 4646|| line_lengths ==
NULL) {
4650last_line_lengths =
NULL;
4657 for(sip = line_lengths; sip !=
NULL; sip = sip->
next) {
4659sip_next = sip->
next;
4660 if(sip_next !=
NULL 4662&& sip_next->
size_value!= anchor_line_length
4663&& ((twoafter = sip_next->
next) ==
NULL 4664|| twoafter->
size_value!= anchor_line_length))
4672 if(best_last_line_length > 0) {
4691 intnot_this_length;
4693patternlist =
NULL;
4694 for(sip = list; sip !=
NULL; sip = sip->
next) {
4697 if(newpattern ==
NULL) {
4712 if(patternlist ==
NULL) {
4717 for(index = patternlist; index !=
NULL; index = index->
next) {
4731index = patternlist;
4732 while( index !=
NULL&& index->
next!= best ) {
4733index = index->
next;
4735 if(index !=
NULL) {
4740 if(best !=
NULL) {
4748&& popular_line_length !=
NULL 4751not_this_length = popular_line_length->
size_value;
4758|| (popular_line_length !=
NULL 4762 if(best ==
NULL) {
4764 if(best ==
NULL) {
4798 if(list ==
NULL|| anchorpattern ==
NULL) {
4802offset_list =
NULL;
4803 for(sip = list; sip !=
NULL; sip = sip->
next) {
4806 if(new_offset ==
NULL) {
4810 if(offset_list ==
NULL) {
4811offset_list = new_offset;
4837 intline_counter, num_chars;
4839 if(list ==
NULL) {
4844line_counter = line_start +
offset;
4847 while(num_chars < pattern_length
4848&& (next_offset ==
NULL|| line_counter < next_offset->ival)
4852 for(end_offset = 0;
4854&& num_chars < pattern_length
4855&& (next_offset ==
NULL 4856|| line_counter < next_offset->ival);
4863 if(num_chars == pattern_length) {
4864 returnline_start +
offset;
4885 intline_counter, forecast_position, line_skip;
4888 intnum_additional_offsets = 0;
4889 intmax_additional_offsets = 5000;
4891 if(list ==
NULL|| anchorpattern ==
NULL) {
4899 if(pattern_length == 0) {
4903prev_offset =
NULL;
4904next_offset = offset_list;
4907 while(sip !=
NULL&& num_additional_offsets < max_additional_offsets) {
4909 if(next_offset !=
NULL&& line_counter > next_offset->
ival) {
4910next_offset = next_offset->
next;
4911}
else if(next_offset !=
NULL&& line_counter == next_offset->
ival) {
4912skipped_previous =
eFalse;
4913prev_offset = next_offset;
4914next_offset = next_offset->
next;
4917 while(num_chars < pattern_length && sip !=
NULL) {
4922}
else if(skipped_previous) {
4924 while(sip !=
NULL&& line_skip < sip->num_appearances
4925&& num_additional_offsets < max_additional_offsets
4926&& (next_offset ==
NULL 4927|| line_counter < next_offset->ival)) {
4933next_offset, line_skip,
4935 if(forecast_position > 0) {
4937num_additional_offsets++;
4938 if(new_offset ==
NULL) {
4941 if(prev_offset ==
NULL) {
4942new_offset->
next= offset_list;
4943offset_list = new_offset;
4945new_offset->
next= next_offset;
4946prev_offset->
next= new_offset;
4948prev_offset = new_offset;
4953 while(num_chars < pattern_length && sip !=
NULL) {
4956&& num_chars < pattern_length;
4974skipped_previous =
eTrue;
4979 if(num_additional_offsets >= max_additional_offsets)
4982offset_list =
NULL;
4993 intline_counter, best_length;
4997 if(offset_list ==
NULL) {
5002pattern_length_list =
NULL;
5004 if(line_counter != -1) {
5006 offset->ival - line_counter);
5008line_counter =
offset->ival;
5027 intline_diff, num_chars, best_num_chars;
5030 if(token_list ==
NULL|| offset_list ==
NULL|| block_length < 1) {
5035prev_offset =
NULL;
5036 for(new_offset = offset_list;
5037new_offset !=
NULL&& lip !=
NULL;
5038new_offset = new_offset->
next)
5040 if(prev_offset ==
NULL) {
5043line_diff < new_offset->
ival&& lip !=
NULL;
5049 if(prev_offset !=
NULL) {
5052line_diff < new_offset->
ival- prev_offset->
ival 5056 if(line_diff < new_offset->ival - prev_offset->
ival- 1) {
5057num_chars += strlen (lip->
data);
5061 if(new_offset->
ival- prev_offset->
ival== block_length) {
5066prev_offset = new_offset;
5069 if(best_num_chars == 0 && pattern_length_list !=
NULL) {
5070best_num_chars = pattern_length_list->
size_value;
5073pattern_length_list =
NULL;
5074 returnbest_num_chars;
5082 intdesired_num_chars)
5085 size_tnum_chars, total_chars, pattern_length, num_starts;
5087 TIntLinkPtrlength_list, start_list, start_ptr, length;
5088 intstart_of_unknown;
5089 intnum_additional_offsets_needed;
5091 if(list ==
NULL|| distance == 0 || desired_num_chars == 0) {
5100 for(lip = list, line_diff = 0;
5101lip !=
NULL&& line_diff < distance
5102&& total_chars < desired_num_chars;
5103lip = lip->
next, line_diff++) {
5104num_chars = strlen (lip->
data);
5105total_chars += num_chars;
5117 if(lip ==
NULL|| line_diff == distance) {
5122start_of_unknown = line_diff;
5124length_list =
NULL;
5127lip !=
NULL&& line_diff < distance;
5128lip = lip->
next, line_diff++)
5130num_chars = strlen (lip->
data);
5132 if(length_list ==
NULL) {
5133length_list = length;
5135total_chars += num_chars;
5139num_additional_offsets_needed = (total_chars / desired_num_chars);
5140 if(num_additional_offsets_needed == 0) {
5150 for(start_ptr = length_list, line_diff = start_of_unknown;
5151start_ptr !=
NULL&& line_diff < distance
5152&& pattern_length < distance - line_diff ;
5153start_ptr = start_ptr->
next, line_diff++) {
5154num_chars = start_ptr->
ival;
5156length = start_ptr->
next;
5157 while(num_chars < desired_num_chars
5158&& pattern_length + line_diff < distance
5161num_chars += length->
ival;
5163length = length->
next;
5165 if(num_chars == desired_num_chars) {
5167 if(start_list ==
NULL) {
5168start_list = length;
5193 TIntLinkPtrprev_offset, new_offset, splice_offset;
5194 intline_diff, num_chars, line_start;
5196 if(token_list ==
NULL|| offset_list ==
NULL 5197|| block_length < 1 || best_num_chars < 1)
5203prev_offset =
NULL;
5204 for(new_offset = offset_list;
5205new_offset !=
NULL&& lip !=
NULL;
5206new_offset = new_offset->
next) {
5207 if(prev_offset ==
NULL) {
5210line_diff < new_offset->
ival&& lip !=
NULL;
5215 if(new_offset->
ival- prev_offset->
ival== block_length) {
5218line_diff < new_offset->
ival- prev_offset->
ival 5227line_diff < new_offset->
ival- prev_offset->
ival 5228&& lip !=
NULL&& num_chars < best_num_chars;
5230num_chars += strlen (lip->
data);
5239 if(line_diff < new_offset->ival - prev_offset->
ival) {
5240line_start = line_diff + prev_offset->
ival;
5242 while(line_diff < new_offset->ival - prev_offset->
ival 5250 if(splice_offset ==
NULL) {
5253splice_offset->
next= new_offset;
5254prev_offset->
next= splice_offset;
5257new_offset->
ival- splice_offset->
ival,
5262prev_offset = new_offset;
5267line_diff < block_length && lip !=
NULL;
5285 for(cp =
data; *cp != 0; cp++) {
5286 if(
isdigit((
unsigned char)(*cp))) {
5317 for(lip = token_list;
5329anchorpattern [1] =
NULL;
5330 if(anchorpattern [0] ==
NULL|| anchorpattern[0]->lengthrepeats ==
NULL) {
5345 if(best_length < 1 && offset_list != NULL && offset_list->
next!=
NULL) {
5346best_length = offset_list->
next->
ival- offset_list->
ival;
5357anchorpattern, afrp,
eFalse);
5429 for(index = 0; index < afp->
num_deflines; index++) {
5454cp += strspn (
str,
" >\t");
5455 len= strcspn (cp,
" \t\r\n");
5463strncpy (
id, cp,
len);
5482cp += strspn (
str,
" >\t");
5483 len= strcspn (cp,
" \t\r\n");
5488 len= strspn (cp,
" \t\r\n");
5514 if(afrp ==
NULL) {
5540 for(scp = list; scp !=
NULL; scp = scp->
next) {
5564 const char* reason,
5566 void* report_error_userdata)
5568 intbad_line_num, bad_line_offset;
5570 charbad_char, curr_char;
5577data_position = lirp->
data_pos+ 1;
5583bad_line_offset, bad_line_num, reason,
5584report_error, report_error_userdata);
5585 returndata_position;
5606 void* report_error_userdata)
5610 intmiddle_start = 0;
5612 charcurr_char, master_char;
5613 EBoolfound_middle_start;
5615 EBoolmatch_not_in_beginning_gap;
5616 EBoolmatch_not_in_end_gap;
5618 charbeginning_gap =
'-';
5619 charmiddle_gap =
'-';
5620 charend_gap =
'-';
5632 if(strlen(sip->
end_gap) > 0 &&
5638 if(arsp ==
NULL|| master_arsp ==
NULL|| sip ==
NULL) {
5642 if(lirp ==
NULL) {
5645 if(arsp != master_arsp) {
5647 if(master_lirp ==
NULL) {
5652master_lirp =
NULL;
5657match_not_in_beginning_gap =
eTrue;
5659match_not_in_beginning_gap =
eFalse;
5663match_not_in_end_gap =
eTrue;
5665match_not_in_end_gap =
eFalse;
5671found_middle_start =
eFalse;
5674 while(curr_char != 0) {
5676 if(! found_middle_start) {
5677middle_start = data_position;
5678found_middle_start =
eTrue;
5680middle_end = data_position + 1;
5682}
else if(! found_middle_start) {
5683 if(match_not_in_beginning_gap
5684&& strchr (sip->
match, curr_char) !=
NULL)
5686middle_start = data_position;
5687found_middle_start =
eTrue;
5688middle_end = data_position + 1;
5695 "expect only beginning gap characters here",
5696report_error, report_error_userdata);
5703 if(match_not_in_end_gap
5704&& strchr (sip->
match, curr_char) !=
NULL)
5706middle_end = data_position + 1;
5713 if(! found_middle_start) {
5715 if(num_segments > 1)
5722report_error, report_error_userdata);
5729data_position = middle_start;
5730 while(data_position < middle_end)
5733 while(data_position < middle_end
5738 if(curr_char == 0 || data_position >= middle_end) {
5740}
else if(strchr (sip->
missing, curr_char) !=
NULL) {
5743}
else if(strchr (sip->
match, curr_char) !=
NULL) {
5745 if(master_char == 0) {
5747 if(master_arsp == arsp) {
5750 "can't specify match chars in first sequence",
5751report_error, report_error_userdata);
5755 "can't find source for match chars",
5756report_error, report_error_userdata);
5770 "expect only sequence, missing, match," 5771 " and middle gap characters here",
5772report_error, report_error_userdata);
5778data_position = middle_end;
5780 while(curr_char != 0) {
5781 if(strchr (sip->
end_gap, curr_char) ==
NULL) {
5784 "expect only end gap characters here",
5785report_error, report_error_userdata);
5872* sizeof (
char*));
5898* sizeof (
char*));
5913* sizeof (
char*));
5921 for(lip = afrp->
deflines, index = 0;
5922lip !=
NULL&& index < afp->num_deflines;
5923lip = lip->
next, index++) {
5930 while(index < afp->num_deflines) {
5935 for(lip = afrp->
organisms, index = 0;
5936lip !=
NULL&& index < afp->num_organisms;
5937lip = lip->
next, index++) {
5943 if(lengths ==
NULL) {
5948 if(best_length ==
NULL) {
5953 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++) {
5954lengths [curr_seg] =
NULL;
5955best_length [curr_seg] = 0;
5960 for(arsp = afrp->
sequences, index = 0;
5961arsp !=
NULL&& index < afp->num_sequences;
5962arsp = arsp->
next, index++) {
5975 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
5978 if(best_length [curr_seg] == 0 && lengths [curr_seg] !=
NULL) {
5979best_length [curr_seg] = lengths [curr_seg]->
size_value;
5989}
else if((
int) strlen (afp->
sequences[index]) != best_length [curr_seg]) {
6019 for(curr_seg = 0; curr_seg < afrp->
num_segments; curr_seg ++)
6045 void* fileuserdata,
6047 void* erroruserdata,
6049 EBooluse_nexus_file_info,
6050 EBoolgen_local_ids)
6061 if(use_nexus_file_info != 0)
6068errfunc, erroruserdata, &
format);
6069 if(afrp ==
NULL) {
6077afrp, gen_local_ids);
6098 void* fileuserdata,
6100 void* erroruserdata,
6102 EBooluse_nexus_file_info)
6105sequence_info, use_nexus_file_info,
eFalse);
6111 void* fileuserdata,
6113 void* erroruserdata,
6123 void* fileuserdata,
6125 void* erroruserdata,
6127 EBoolgen_local_ids)
6130sequence_info,
eFalse, gen_local_ids);
static EBool s_SkippableString(char *str)
struct SAlignRawSeq SAlignRawSeq
static int s_ForecastPattern(int line_start, int pattern_length, TIntLinkPtr next_offset, int sip_offset, TSizeInfoPtr list)
void AlignmentFileFree(TAlignmentFilePtr afp)
static void s_CreateSequencesBasedOnTokenPatterns(TLineInfoPtr token_list, TIntLinkPtr offset_list, SLengthListPtr *anchorpattern, SAlignRawFilePtr afrp, EBool gen_local_ids)
static TLineInfoPtr s_LineInfoNew(const char *string, int line_num, int line_offset)
static void s_ProcessAlignFileRawByLengthPattern(SAlignRawFilePtr afrp)
static void s_BracketedCommentListFree(TBracketedCommentListPtr list)
TAlignmentFilePtr AlignmentFileNew(void)
static void s_RemoveBasePairCountCommentsFromData(SAlignRawFilePtr afrp)
static int s_FindAlignRawSeqOffsetById(TAlignRawSeqPtr list, char *id)
static char * s_ReplaceNexusTypeChar(char *str, char c)
static int s_ForecastBlockPattern(SLengthListPtr pattern_list, TIntLinkPtr next_offset, int line_start, int block_size)
static void s_AddDeflineFromOrganismLine(char *defline, int line_num, int defline_offset, SAlignRawFilePtr afrp)
static TLineInfoPtr s_BuildTokenList(TLineInfoPtr line_list)
static void s_ReportDuplicateIDError(char *id, int line_num, FReportErrorFunction report_error, void *report_error_userdata)
static void s_ReportIncorrectNumberOfSequences(int num_expected, int num_found, FReportErrorFunction report_error, void *report_error_userdata)
struct SLineInfoReader SLineInfoReader
TErrorInfoPtr ErrorInfoNew(TErrorInfoPtr list)
struct SAlignFileRaw SAlignRawFileData
TAlignmentFilePtr ReadAlignmentFile2(FReadLineFunction readfunc, void *fileuserdata, FReportErrorFunction errfunc, void *erroruserdata, TSequenceInfoPtr sequence_info, EBool gen_local_ids)
static EBool s_AfrpInitLineData(SAlignRawFilePtr afrp, FReadLineFunction readfunc, void *pfile)
static void s_ReportMissingOrganismInfo(FReportErrorFunction report_error, void *report_error_userdata)
static EBool s_SizeInfoIsEqual(TSizeInfoPtr s1, TSizeInfoPtr s2)
static const size_t kMaxPrintedIntLen
static EBool s_IsBlank(char *str)
static char * s_GetAlignRawSeqIDByOffset(TAlignRawSeqPtr list, int offset)
static void s_ReportBlockLengthError(const char *id, int line_num, int expected_num, int actual_num, FReportErrorFunction report_error, void *report_error_userdata)
static EBool s_CheckNexusCharInfo(const char *str, TSequenceInfoPtr sequence_info, FReportErrorFunction errfunc, void *errdata)
static char * s_GetIdFromString(char *str)
static void s_IntLinkFree(TIntLinkPtr ilp)
static void s_ReportUnusedLine(int line_num_start, int line_num_stop, TLineInfoPtr line_val, FReportErrorFunction errfunc, void *errdata)
static void s_AfrpProcessFastaGap(SAlignRawFilePtr afrp, SLengthListPtr *patterns, EBool *last_line_was_marked_id, char *plinestr, int overall_line_count)
static EBool s_IsAlnFormatString(char *str)
TAlignmentFilePtr ReadAlignmentFileEx2(FReadLineFunction readfunc, void *fileuserdata, FReportErrorFunction errfunc, void *erroruserdata, TSequenceInfoPtr sequence_info, EBool use_nexus_file_info, EBool gen_local_ids)
static TLineInfoReaderPtr s_LineInfoReaderNew(TLineInfoPtr line_list)
static int s_StringNICmp(const char *str1, const char *str2, int cmp_count)
static int s_GetBestCharacterLength(TLineInfoPtr token_list, TIntLinkPtr offset_list, int block_length)
static void s_ReportOrgCommentError(char *linestring, FReportErrorFunction errfunc, void *errdata)
static void s_GetFASTAExpectedNumbers(char *str, SAlignRawFilePtr afrp)
static void s_LengthListFree(SLengthListPtr llp)
static TStringCountPtr s_StringCountNew(TStringCountPtr list)
TSequenceInfoPtr SequenceInfoNew(void)
static void s_FindInterleavedBlocks(SLengthListPtr pattern_list, SAlignRawFilePtr afrp)
static int s_CountCharactersBetweenOffsets(TLineInfoPtr list, int distance, int desired_num_chars)
static TIntLinkPtr GetSegmentOffsetList(TBracketedCommentListPtr comment_list)
static EBool s_DoesBlockHaveIds(SAlignRawFilePtr afrp, TLineInfoPtr first_line, int num_lines_in_block)
static void s_RemoveCommentsFromBlock(TLineInfoPtr first_line, int num_lines_in_block)
TAlignmentFilePtr ReadAlignmentFileEx(FReadLineFunction readfunc, void *fileuserdata, FReportErrorFunction errfunc, void *erroruserdata, TSequenceInfoPtr sequence_info, EBool use_nexus_file_info)
static void s_ReportInconsistentID(char *id, int line_number, FReportErrorFunction report_error, void *report_error_userdata)
static TSizeInfoPtr s_SizeInfoNew(TSizeInfoPtr list)
static SLengthListPtr * s_CreateAnchorPatternForMarkedIDs(SAlignRawFilePtr afrp)
static void s_ReportBadSequenceLength(char *id, int expected_length, int actual_length, FReportErrorFunction report_error, void *report_error_userdata)
static int s_ReportRepeatedBadCharsInSequence(TLineInfoReaderPtr lirp, char *id, const char *reason, FReportErrorFunction report_error, void *report_error_userdata)
static void s_ReportRepeatedId(TStringCountPtr scp, FReportErrorFunction report_error, void *report_error_userdata)
struct SLineInfo SLineInfo
static void s_ProcessBlockLines(SAlignRawFilePtr afrp, TLineInfoPtr lines, int num_lines_in_block, EBool first_block)
static int s_CountSequencesInBracketedComment(TBracketedCommentListPtr comment)
static EBool s_IsASN1(char *linestring)
static int s_LineInfoReaderGetCurrentLineNumber(TLineInfoReaderPtr lirp)
static void s_ReportASN1Error(FReportErrorFunction errfunc, void *errdata)
static TIntLinkPtr s_CreateOffsetList(TSizeInfoPtr list, SLengthListPtr anchorpattern)
static void s_ReportBadCharError(char *id, char bad_char, int num_bad, int offset, int line_number, const char *reason, FReportErrorFunction errfunc, void *errdata)
static void s_RemoveOrganismCommentFromLine(char *string)
static void s_ProcessAlignFileRawForMarkedIDs(SAlignRawFilePtr afrp, EBool gen_local_ids)
static EBool s_AddAlignRawSeqByIndex(TAlignRawSeqPtr list, int index, char *data, int data_line_num, int data_line_offset)
static void s_InsertNewOffsets(TLineInfoPtr token_list, TIntLinkPtr offset_list, int block_length, int best_num_chars, char *alphabet)
static EBool s_DoLengthPatternsMatch(SLengthListPtr llp1, SLengthListPtr llp2)
static int s_StringICmp(const char *str1, const char *str2)
static char * s_GetDeflineFromIdString(char *str)
static TAlignRawSeqPtr s_FindAlignRawSeqById(TAlignRawSeqPtr list, char *id)
static EBool s_IsOrganismComment(TCommentLocPtr clp)
struct SCommentLoc * TCommentLocPtr
static TLineInfoPtr s_RemoveCommentsFromTokens(TLineInfoPtr list)
TAlignmentFilePtr ReadAlignmentFile(FReadLineFunction readfunc, void *fileuserdata, FReportErrorFunction errfunc, void *erroruserdata, TSequenceInfoPtr sequence_info)
static EBool s_IsConsensusLine(char *str)
static SLengthListPtr s_GetBlockPattern(const char *cp)
static TAlignmentFilePtr s_ConvertDataToOutput(SAlignRawFilePtr afrp, TSequenceInfoPtr sip)
static EBool s_FindBadDataCharsInSequence(TAlignRawSeqPtr arsp, TAlignRawSeqPtr master_arsp, TSequenceInfoPtr sip, int num_segments, FReportErrorFunction report_error, void *report_error_userdata)
static EBool s_FoundStopLine(char *linestring)
static SLengthListPtr s_AddLengthList(SLengthListPtr list, SLengthListPtr llp)
struct SAlignFileRaw * SAlignRawFilePtr
struct SIntLink * TIntLinkPtr
static TIntLinkPtr s_AugmentBlockPatternOffsetList(SLengthListPtr pattern_list, TIntLinkPtr offset_list, int block_size)
static int s_GetNumSegmentsInAlignment(TBracketedCommentListPtr comment_list, FReportErrorFunction errfunc, void *errdata)
static TIntLinkPtr s_IntLinkNew(int ival, TIntLinkPtr list)
static SAlignRawFilePtr s_AlignFileRawNew(void)
static EBool s_GetOneNexusSizeComment(const char *str, const char *valname, int *val)
static int s_GetMostPopularSize(TSizeInfoPtr list)
struct SSizeInfo SSizeInfo
static void s_ReportLineLengthError(char *id, TLineInfoPtr lip, int expected_length, FReportErrorFunction report_error, void *report_error_userdata)
static TSizeInfoPtr s_FindMostFrequentlyOccurringTokenLength(TSizeInfoPtr list, int not_this_size)
static void s_ReportInconsistentBlockLine(char *id, int line_number, FReportErrorFunction report_error, void *report_error_userdata)
static SLengthListPtr s_LengthListNew(SLengthListPtr list)
static void s_LineInfoReaderAdvancePastSpace(TLineInfoReaderPtr lirp)
static TIntLinkPtr s_AugmentOffsetList(TIntLinkPtr offset_list, TSizeInfoPtr list, SLengthListPtr anchorpattern)
static EBool s_SkippableNexusComment(char *str)
static EBool s_IsOnlyNumbersAndSpaces(char *str)
static void s_BracketedCommentListAddLine(TBracketedCommentListPtr comment, const char *string, int line_num, int line_offset)
static int s_LineInfoReaderGetCurrentLineOffset(TLineInfoReaderPtr lirp)
static EBool s_BlockIsConsistent(SAlignRawFilePtr afrp, TLineInfoPtr first_line, int num_lines_in_block, EBool has_ids, EBool first_block)
static void s_ReportSegmentedAlignmentError(TIntLinkPtr offset_list, FReportErrorFunction errfunc, void *errdata)
static TSizeInfoPtr s_AddSizeInfo(TSizeInfoPtr list, int size_value)
struct SStringCount * TStringCountPtr
static void s_AlignFileRawFree(SAlignRawFilePtr afrp)
static void s_GetNexusSizeComments(const char *str, EBool *found_ntax, EBool *found_nchar, SAlignRawFilePtr afrp)
static void s_AddLengthRepeat(SLengthListPtr llp, int size_value)
static void s_ReportMissingSequenceData(char *id, FReportErrorFunction report_error, void *report_error_userdata)
static char s_FindNthDataChar(TLineInfoReaderPtr lirp, int pos)
static TCommentLocPtr s_FindOrganismComment(char *string)
struct SCommentLoc SCommentLoc
static char s_GetNexusTypechar(const char *str, const char *val_name)
static EBool s_s_FindBadDataCharsInSequenceList(SAlignRawFilePtr afrp, TSequenceInfoPtr sip)
static void s_AlignRawSeqFree(TAlignRawSeqPtr arsp)
static void s_LineInfoReaderFree(TLineInfoReaderPtr lirp)
static TAlignRawSeqPtr s_AddAlignRawSeqById(TAlignRawSeqPtr list, char *id, char *data, int id_line_num, int data_line_num, int data_line_offset)
static TAlignRawSeqPtr s_AlignRawSeqNew(TAlignRawSeqPtr list)
static TCommentLocPtr s_FindComment(char *string)
struct SLengthList * SLengthListPtr
static char * s_CreateOrderedOrgName(TCommentLocPtr org_clp)
static EBool s_FindUnusedLines(SLengthListPtr pattern_list, SAlignRawFilePtr afrp)
static TLineInfoPtr s_RemoveNexusCommentsFromTokens(TLineInfoPtr list)
static void s_ProcessAlignRawFileByBlockOffsets(SAlignRawFilePtr afrp)
static void s_CommentLocFree(TCommentLocPtr clp)
void ErrorInfoFree(TErrorInfoPtr eip)
static void s_LineInfoFree(TLineInfoPtr lip)
static void s_ExtendAnchorPattern(SLengthListPtr anchorpattern, TSizeInfoPtr line_lengths)
static TStringCountPtr s_AddStringCount(char *string, int line_num, TStringCountPtr list)
static int s_GetMostPopularPatternLength(TIntLinkPtr offset_list)
struct SAlignRawSeq * TAlignRawSeqPtr
static void s_ReadOrgNamesFromText(char *string, int line_num, SAlignRawFilePtr afrp)
static void s_TrimSpace(char **ppline)
static SLengthListPtr s_FindMostPopularPattern(TSizeInfoPtr list)
static char * s_LineInfoMergeAndStripSpaces(TLineInfoPtr list)
static void s_StringLeftShift(char *cp_from, char *cp_to)
struct SLineInfoReader * TLineInfoReaderPtr
struct SBracketedCommentList SBracketedCommentList
static EBool s_ContainsDigits(char *data)
static void s_LineInfoReaderReset(TLineInfoReaderPtr lirp)
struct SBracketedCommentList * TBracketedCommentListPtr
struct SLineInfo * TLineInfoPtr
static void s_ReportBadNumSegError(int line_num, int num_seg, int num_seg_exp, FReportErrorFunction errfunc, void *errdata)
static TLineInfoPtr s_DeleteLineInfos(TLineInfoPtr list)
static EBool s_ReprocessIds(SAlignRawFilePtr afrp)
struct SSizeInfo * TSizeInfoPtr
static TSizeInfoPtr s_AddSizeInfoAppearances(TSizeInfoPtr list, int size_value, int num_appearances)
struct SStringCount SStringCount
static void s_StringCountFree(TStringCountPtr list)
static TCommentLocPtr s_CreateOrderedOrgCommentList(TCommentLocPtr org_clp)
static char * s_TokenizeString(char *str, const char *delimiter, char **last)
static TBracketedCommentListPtr s_BracketedCommentListNew(TBracketedCommentListPtr list, const char *string, int line_num, int line_offset)
#define kMaxPrintedIntLenPlusOne
static void s_SizeInfoFree(TSizeInfoPtr list)
static EBool s_IsTwoNumbersSeparatedBySpace(const char *str)
void SequenceInfoFree(TSequenceInfoPtr sip)
static SAlignRawFilePtr s_ReadAlignFileRaw(FReadLineFunction readfunc, void *userdata, TSequenceInfoPtr sequence_info, EBool use_nexus_file_info, FReportErrorFunction errfunc, void *errdata, EAlignFormat *pformat)
static void s_ReportIncorrectSequenceLength(int len_expected, int len_found, FReportErrorFunction report_error, void *report_error_userdata)
static TSizeInfoPtr s_GetMostPopularSizeInfo(TSizeInfoPtr list)
static EBool s_UpdateNexusCharInfo(const char *str, TSequenceInfoPtr sequence_info)
static void s_RemoveCommentFromLine(char *linestring)
static TLineInfoPtr s_AddLineInfo(TLineInfoPtr list, const char *string, int line_num, int line_offset)
static void s_ReportCharCommentError(const char *expected, char seen, const char *val_name, FReportErrorFunction errfunc, void *errdata)
struct SLengthList SLengthListData
char *(* FReadLineFunction)(void *userdata)
void(* FReportErrorFunction)(TErrorInfoPtr err_ptr, void *userdata)
struct SSequenceInfo * TSequenceInfoPtr
struct SErrorInfo * TErrorInfoPtr
struct SAlignmentFile * TAlignmentFilePtr
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static unsigned int line_num
static const char * expected[]
static const char * str(char *buf, int n)
The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format
int strncmp(const char *str1, const char *str2, size_t count)
int strcmp(const char *str1, const char *str2)
static const char delimiter[]
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
int expected_num_sequence
TAlignRawSeqPtr sequences
FReportErrorFunction report_error
int expected_sequence_len
void * report_error_userdata
TLineInfoPtr sequence_data
struct SAlignRawSeq * next
struct SLengthList * next
TSizeInfoPtr lengthrepeats
struct SStringCount * next
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4