git: 39e57d44aa5a - main - audio/festival: fix crash due to overly aggressive optimization

From: Mikhail Teterin <mi_at_FreeBSD.org>
Date: Tue, 21 Feb 2023 07:26:04 UTC
The branch main has been updated by mi:

URL: https://cgit.FreeBSD.org/ports/commit/?id=39e57d44aa5af331426f14f1a54c07d6912a7b91

commit 39e57d44aa5af331426f14f1a54c07d6912a7b91
Author:     Mikhail Teterin <mi@FreeBSD.org>
AuthorDate: 2023-02-21 07:23:22 +0000
Commit:     Mikhail Teterin <mi@FreeBSD.org>
CommitDate: 2023-02-21 07:23:22 +0000

    audio/festival: fix crash due to overly aggressive optimization
    
    Modern clang, optimize away checks like "if (this == NULL)", which
    are, indeed, redundant in good C++ code. Unfortunately, festival's
    code is not too good, and the checks are essential...
    
    PR:     269583
    
    While here, eliminate some of the other warnings raised by both
    compiler and valgrind, and adapt one more patch from Debian.
    
    Bump PORTREVISION.
---
 audio/festival/Makefile                            |   8 +-
 audio/festival/files/patch-hts-buffer-bounds-check | 313 +++++++++++++++++++++
 audio/festival/files/patch-warnings                |  76 +++++
 3 files changed, 393 insertions(+), 4 deletions(-)

diff --git a/audio/festival/Makefile b/audio/festival/Makefile
index 3c143deffb14..05001c346ef7 100644
--- a/audio/festival/Makefile
+++ b/audio/festival/Makefile
@@ -1,6 +1,6 @@
 PORTNAME=	festival
 PORTVERSION=	2.4
-PORTREVISION=	1
+PORTREVISION=	2
 CATEGORIES=	audio accessibility
 MASTER_SITES=	FESTIVAL
 DISTFILES=	${DISTNAME}-release.tar.gz	\
@@ -27,13 +27,13 @@ WWW=		https://www.cstr.ed.ac.uk/projects/festival/
 OPTIONS_DEFINE=	NAS
 OPTIONS_DEFAULT=NAS
 
-CXXFLAGS+=	-DFTLIBDIR=${LOCALBASE}/share/festival/lib
+CXXFLAGS+=	-DFTLIBDIR=${LOCALBASE}/share/festival/lib -fno-delete-null-pointer-checks
 CONFIGURE_WRKSRC=${WRKDIR}/festival
 USES=		gmake
 SPEECHTOOLS=	${WRKSRC}/speech_tools
 FESTIVAL=	${WRKSRC}/festival
-MAKE_ARGS+=	CC="${CC}" GCC="${CC}" \
-		CXX="${CXX}" GXX="${CXX}" \
+MAKE_ARGS+=	CC="${CCACHE_BIN} ${CC}" GCC="${CCACHE_BIN} ${CC}" \
+		CXX="${CCACHE_BIN} ${CXX}" GXX="${CCACHE_BIN} ${CXX}" \
 		EST_HOME=${SPEECHTOOLS}
 WRKSRC=		${WRKDIR}
 
diff --git a/audio/festival/files/patch-hts-buffer-bounds-check b/audio/festival/files/patch-hts-buffer-bounds-check
new file mode 100644
index 000000000000..0cb94a0aa357
--- /dev/null
+++ b/audio/festival/files/patch-hts-buffer-bounds-check
@@ -0,0 +1,313 @@
+Obtained from Debian. Original description follows:
+
+Description: HTS engine does not check buffer bounds in some functions.
+This patch adds bounds checking to prevent writing past the end of the buffer.
+
+Author: Peter Drysdale <drysdalepete@gmail.com>
+
+--- festival/src/modules/hts_engine/HTS_engine.c
++++ festival/src/modules/hts_engine/HTS_engine.c
+@@ -467,7 +467,7 @@
+ }
+ 
+ /* HTS_Engine_synthesize_from_strings: synthesize speech from strings */
+-HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
++HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, const char **lines, size_t num_lines)
+ {
+    HTS_Engine_refresh(engine);
+    HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
+--- festival/src/modules/hts_engine/HTS_engine.h
++++ festival/src/modules/hts_engine/HTS_engine.h
+@@ -427,7 +427,7 @@
+ HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn);
+ 
+ /* HTS_Engine_synthesize_from_strings: synthesize speech from string list */
+-HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
++HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, const char **lines, size_t num_lines);
+ 
+ /* HTS_Engine_save_information: save trace information */
+ void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp);
+--- festival/src/modules/hts_engine/HTS_hidden.h
++++ festival/src/modules/hts_engine/HTS_hidden.h
+@@ -117,16 +117,16 @@
+ size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp);
+ 
+ /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
+-HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff);
++HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff, size_t bufflen);
+ 
+ /* HTS_get_token: get token from file pointer (separators are space,tab,line break) */
+-HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff);
++HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff, size_t bufflen);
+ 
+ /* HTS_get_token: get token from file pointer with specified separator */
+ HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator);
+ 
+ /* HTS_get_token_from_string: get token from string (separator are space,tab,line break) */
+-HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff);
++HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff, size_t bufflen);
+ 
+ /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
+ HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator);
+@@ -248,7 +248,7 @@
+ void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn);
+ 
+ /* HTS_Label_load_from_strings: load label list from string list */
+-void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines);
++void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char **lines, size_t num_lines);
+ 
+ /* HTS_Label_get_size: get number of label string */
+ size_t HTS_Label_get_size(HTS_Label * label);
+--- festival/src/modules/hts_engine/HTS_misc.c
++++ festival/src/modules/hts_engine/HTS_misc.c
+@@ -333,7 +333,7 @@
+ }
+ 
+ /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
+-HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff)
++HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff, size_t bufflen)
+ {
+    char c;
+    size_t i;
+@@ -369,7 +369,7 @@
+    }
+ 
+    i = 0;
+-   while (1) {
++   while (i<bufflen) {
+       buff[i++] = c;
+       c = HTS_fgetc(fp);
+       if (squote && c == '\'')
+@@ -386,12 +386,16 @@
+       }
+    }
+ 
++   if (i == bufflen) {
++      HTS_error(2,"HTS_get_pattern_token: Buffer overflow.\n");
++   }
++
+    buff[i] = '\0';
+    return TRUE;
+ }
+ 
+ /* HTS_get_token: get token from file pointer (separators are space, tab, and line break) */
+-HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff)
++HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff, size_t bufflen)
+ {
+    char c;
+    size_t i;
+@@ -407,7 +411,7 @@
+          return FALSE;
+    }
+ 
+-   for (i = 0; c != ' ' && c != '\n' && c != '\t';) {
++   for (i = 0; c != ' ' && c != '\n' && c != '\t'  && (i<bufflen);) {
+       buff[i++] = c;
+       if (HTS_feof(fp))
+          break;
+@@ -416,6 +420,10 @@
+          break;
+    }
+ 
++   if (i == bufflen) {
++      HTS_error(2,"HTS_get_token: Buffer overflow.\n");
++   }
++
+    buff[i] = '\0';
+    return TRUE;
+ }
+@@ -451,7 +459,7 @@
+ }
+ 
+ /* HTS_get_token_from_string: get token from string (separators are space, tab, and line break) */
+-HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff)
++HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff, size_t bufflen)
+ {
+    char c;
+    size_t i;
+@@ -467,11 +475,15 @@
+          return FALSE;
+       c = string[(*index)++];
+    }
+-   for (i = 0; c != ' ' && c != '\n' && c != '\t' && c != '\0'; i++) {
++   for (i = 0; c != ' ' && c != '\n' && c != '\t' && c != '\0' && (i<bufflen); i++) {
+       buff[i] = c;
+       c = string[(*index)++];
+    }
+ 
++   if (i == bufflen) {
++      HTS_error(2,"HTS_get_token_from_string: Buffer overflow.\n");
++   }
++
+    buff[i] = '\0';
+    return TRUE;
+ }
+@@ -480,7 +492,7 @@
+ HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator)
+ {
+    char c;
+-   size_t start;
++   /*size_t start;*/
+    size_t len = 0;
+ 
+    if (str == NULL)
+@@ -495,7 +507,7 @@
+       (*index)++;
+       c = str[(*index)];
+    }
+-   start = (*index);
++   /*start = (*index);*/
+    while (c != separator && c != '\0') {
+       buff[len++] = c;
+       (*index)++;
+--- festival/src/modules/hts_engine/HTS_model.c
++++ festival/src/modules/hts_engine/HTS_model.c
+@@ -194,12 +194,12 @@
+    HTS_Question_clear(question);
+ 
+    /* get question name */
+-   if (HTS_get_pattern_token(fp, buff) == FALSE)
++   if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE)
+       return FALSE;
+    question->string = HTS_strdup(buff);
+ 
+    /* get pattern list */
+-   if (HTS_get_pattern_token(fp, buff) == FALSE) {
++   if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+       HTS_Question_clear(question);
+       return FALSE;
+    }
+@@ -207,7 +207,7 @@
+    last_pattern = NULL;
+    if (strcmp(buff, "{") == 0) {
+       while (1) {
+-         if (HTS_get_pattern_token(fp, buff) == FALSE) {
++         if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+             HTS_Question_clear(question);
+             return FALSE;
+          }
+@@ -218,7 +218,7 @@
+             question->head = pattern;
+          pattern->string = HTS_strdup(buff);
+          pattern->next = NULL;
+-         if (HTS_get_pattern_token(fp, buff) == FALSE) {
++         if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+             HTS_Question_clear(question);
+             return FALSE;
+          }
+@@ -358,7 +358,7 @@
+    if (tree == NULL || fp == NULL)
+       return FALSE;
+ 
+-   if (HTS_get_pattern_token(fp, buff) == FALSE) {
++   if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+       HTS_Tree_clear(tree);
+       return FALSE;
+    }
+@@ -367,14 +367,14 @@
+    tree->root = last_node = node;
+ 
+    if (strcmp(buff, "{") == 0) {
+-      while (HTS_get_pattern_token(fp, buff) == TRUE && strcmp(buff, "}") != 0) {
++      while (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == TRUE && strcmp(buff, "}") != 0) {
+          node = HTS_Node_find(last_node, atoi(buff));
+          if (node == NULL) {
+             HTS_error(0, "HTS_Tree_load: Cannot find node %d.\n", atoi(buff));
+             HTS_Tree_clear(tree);
+             return FALSE;
+          }
+-         if (HTS_get_pattern_token(fp, buff) == FALSE) {
++         if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+             HTS_Tree_clear(tree);
+             return FALSE;
+          }
+@@ -389,7 +389,7 @@
+          HTS_Node_initialize(node->yes);
+          HTS_Node_initialize(node->no);
+ 
+-         if (HTS_get_pattern_token(fp, buff) == FALSE) {
++         if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+             node->quest = NULL;
+             free(node->yes);
+             free(node->no);
+@@ -403,7 +403,7 @@
+          node->no->next = last_node;
+          last_node = node->no;
+ 
+-         if (HTS_get_pattern_token(fp, buff) == FALSE) {
++         if (HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN) == FALSE) {
+             node->quest = NULL;
+             free(node->yes);
+             free(node->no);
+@@ -495,7 +495,7 @@
+    win->coefficient = (double **) HTS_calloc(win->size, sizeof(double *));
+    /* set delta coefficents */
+    for (i = 0; i < win->size; i++) {
+-      if (HTS_get_token_from_fp(fp[i], buff) == FALSE) {
++      if (HTS_get_token_from_fp(fp[i], buff, HTS_MAXBUFLEN) == FALSE) {
+          result = FALSE;
+          fsize = 1;
+       } else {
+@@ -508,7 +508,7 @@
+       /* read coefficients */
+       win->coefficient[i] = (double *) HTS_calloc(fsize, sizeof(double));
+       for (j = 0; j < fsize; j++) {
+-         if (HTS_get_token_from_fp(fp[i], buff) == FALSE) {
++         if (HTS_get_token_from_fp(fp[i], buff, HTS_MAXBUFLEN) == FALSE) {
+             result = FALSE;
+             win->coefficient[i][j] = 0.0;
+          } else {
+@@ -610,7 +610,7 @@
+    last_question = NULL;
+    last_tree = NULL;
+    while (!HTS_feof(fp)) {
+-      HTS_get_pattern_token(fp, buff);
++      HTS_get_pattern_token(fp, buff, HTS_MAXBUFLEN);
+       /* parse questions */
+       if (strcmp(buff, "QS") == 0) {
+          question = (HTS_Question *) HTS_calloc(1, sizeof(HTS_Question));
+--- festival/src/modules/hts_engine/HTS_label.c
++++ festival/src/modules/hts_engine/HTS_label.c
+@@ -117,5 +117,5 @@
+ 
+    /* parse label file */
+-   while (HTS_get_token_from_fp(fp, buff)) {
++   while (HTS_get_token_from_fp(fp, buff, HTS_MAXBUFLEN)) {
+       if (!isgraph((int) buff[0]))
+          break;
+@@ -130,9 +130,9 @@
+       }
+       if (isdigit_string(buff)) {       /* has frame infomation */
+          start = atof(buff);
+-         HTS_get_token_from_fp(fp, buff);
++         HTS_get_token_from_fp(fp, buff, HTS_MAXBUFLEN);
+          end = atof(buff);
+-         HTS_get_token_from_fp(fp, buff);
++         HTS_get_token_from_fp(fp, buff, HTS_MAXBUFLEN);
+          lstring->start = rate * start;
+          lstring->end = rate * end;
+       } else {
+@@ -154,7 +154,7 @@
+ }
+ 
+ /* HTS_Label_load_from_strings: load label from strings */
+-void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines)
++void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char **lines, size_t num_lines)
+ {
+    char buff[HTS_MAXBUFLEN];
+    HTS_LabelString *lstring = NULL;
+@@ -182,11 +182,11 @@
+       }
+       data_index = 0;
+       if (isdigit_string(lines[i])) {   /* has frame infomation */
+-         HTS_get_token_from_string(lines[i], &data_index, buff);
++         HTS_get_token_from_string(lines[i], &data_index, buff, HTS_MAXBUFLEN);
+          start = atof(buff);
+-         HTS_get_token_from_string(lines[i], &data_index, buff);
++         HTS_get_token_from_string(lines[i], &data_index, buff, HTS_MAXBUFLEN);
+          end = atof(buff);
+-         HTS_get_token_from_string(lines[i], &data_index, buff);
++         HTS_get_token_from_string(lines[i], &data_index, buff, HTS_MAXBUFLEN);
+          lstring->name = HTS_strdup(buff);
+          lstring->start = rate * start;
+          lstring->end = rate * end;
diff --git a/audio/festival/files/patch-warnings b/audio/festival/files/patch-warnings
new file mode 100644
index 000000000000..228b3a270848
--- /dev/null
+++ b/audio/festival/files/patch-warnings
@@ -0,0 +1,76 @@
+Address some of the warnings flagged by either compiler or valgrind.
+
+	-mi
+--- speech_tools/stats/EST_Discrete.cc	2010-11-05 10:12:43.000000000 -0400
++++ speech_tools/stats/EST_Discrete.cc	2023-02-20 22:17:06.842236000 -0500
+@@ -152,5 +152,5 @@
+     for (i=0; i<next_free; i++)
+ 	delete discretes[i];
+-    delete discretes;
++    delete[] discretes;
+ }
+ 
+--- festival/src/modules/hts_engine/fest2hts_engine.cc	2013-02-18 10:10:52.000000000 -0500
++++ festival/src/modules/hts_engine/fest2hts_engine.cc	2023-02-20 22:55:59.303248000 -0500
+@@ -191,16 +191,16 @@
+   char *copyright[] = { HTS_COPYRIGHT };
+ 
+-  sprintf(str,
++  str += sprintf(str,
+            "\nThe HMM-Based Speech Synthesis Engine \"hts_engine API\"\n");
+ 
+-  sprintf(str,
+-           "%shts_engine API version %s (%s)\n", str, version, url);
++  str += sprintf(str,
++           "hts_engine API version %s (%s)\n", version, url);
+   for (i = 0; i < nCopyright; i++) {
+     if (i == 0)
+-      sprintf(str,
+-               "%sCopyright (C) %s\n", str, copyright[i]);
++      str += sprintf(str,
++               "Copyright (C) %s\n", copyright[i]);
+     else
+-      sprintf(str,
+-               "%s              %s\n", str, copyright[i]);
++      str += sprintf(str,
++               "              %s\n", copyright[i]);
+   }
+   sprintf(str, "%sAll rights reserved.\n", str);
+--- speech_tools/speech_class/EST_wave_io.cc	2013-10-14 17:54:33.000000000 -0400
++++ speech_tools/speech_class/EST_wave_io.cc	2023-02-21 00:03:12.559352000 -0500
+@@ -230,5 +230,5 @@
+ 	data_length = length*(*num_channels);
+ 
+-    file_data = walloc(unsigned char,sample_width * data_length);
++    file_data = new unsigned char[sample_width * data_length];
+ 
+     ts.seek(current_pos+NIST_HDR_SIZE+(sample_width*offset*(*num_channels)));
+--- speech_tools/siod/slib_python.cc	2014-12-11 10:30:16.000000000 -0500
++++ speech_tools/siod/slib_python.cc	2023-02-21 00:07:42.577728000 -0500
+@@ -372,8 +372,4 @@
+   Py_Finalize();
+ }
+-#else   // No python support
+-
+-/* So there is a symbol in here even if there is no python support */
+-static int est_no_python_support = 1;
+ 
+ #endif  // EST_SIOD_ENABLE_PYTHON
+--- speech_tools/include/EST_Token.h	2004-09-29 04:24:17.000000000 -0400
++++ speech_tools/include/EST_Token.h	2023-02-21 00:23:22.647701000 -0500
+@@ -119,6 +119,4 @@
+     const EST_String &string() const { return String(); }
+     /// Access token as a string
+-    const EST_String &S() const { return S(); }
+-    /// Access token as a string
+     const EST_String &String() const { return pname; }
+     /// For automatic coercion to \Ref{EST_String}
+--- festival/src/modules/UniSyn/us_mapping.cc	2014-12-18 10:48:03.000000000 -0500
++++ festival/src/modules/UniSyn/us_mapping.cc	2023-02-21 00:47:56.907441000 -0500
+@@ -169,5 +169,5 @@
+ 	// find closest pitchmark (assume only need forward search from current
+ 	// point, since pitchmarks should always be increasing)
+-	while( (apm_i<=s_i_end) && (fabs((next_apm_t*m)-tpm_t) <= fabs((apm_t*m)-tpm_t)) ){
++	while ((apm_i < s_i_end - 1) && (fabs((next_apm_t*m)-tpm_t) <= fabs((apm_t*m)-tpm_t))) {
+ // 	  printf("(next_apm_t apm_t) %f %f\n", 
+ // 		 fabs((next_apm_t*m)-tpm_t), fabs((apm_t*m)-tpm_t) );