static char rcsid[] = "@(#)$Id: hdrencode.c,v 2.12 2016/05/01 09:25:08 hurtta Exp $";

/******************************************************************************
 *  The Elm (ME+) Mail System  -  $Revision: 2.12 $   $State: Exp $
 *
 *  Author: Kari Hurtta <hurtta+elm@siilo.FMI.FI> 
 *                  (was hurtta+elm@posti.FMI.FI, hurtta+elm@ozone.FMI.FI)
 *      or  Kari Hurtta <elm@elmme-mailer.org>
 *
 *  Partially based on mime_encode.c, which is initially 
 *     written by: Michael Elkins <elkins@aero.org>, 1995
 *****************************************************************************/

#include "elm_defs.h"
#include "s_me.h"

DEBUG_VAR(Debug,__FILE__,"mime");

char hexchars[16] = {
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D',
	'E', 'F',
};

static char *us2s P_((unsigned char *str));
static char *us2s(str) 
     unsigned char *str;
{
    return (char *)str;
}


/* Need also encode special characters is comments and phrases:
   \ " ( ) < > and so on
*/
static char * hdr_tencode P_((char *buffer, const char *cs, int *Elen,
			      const char *lang, int structured));
static char * hdr_tencode(buffer,cs,Elen,lang,structured)
     char *buffer;
     const char *cs;
     int *Elen;
     const char *lang;
     int structured;
{
    char * ret = NULL;
    int bad = 0;
    char * p1, *work;
    int clen,llen=0;
    int l;
    int i = 0;

    if (!cs || NULL != strpbrk(cs," \t\r\n()\"?*'="))
	cs = "UNKNOWN-8BIT";
    clen = strlen(cs);

    if (lang && NULL != strpbrk(lang," \t\r\n()\"?*'="))
	lang = NULL;
    if (lang)
	llen = strlen(lang);

    for (p1 = buffer; *p1; p1++) {
	if ((*p1 < '0' || *p1 > '9') &&
	    (*p1 < 'a' || *p1 > 'z') &&
	    (*p1 < 'A' || *p1 > 'Z') &&
	    '-' != *p1 && ' ' != *p1 &&
	    '+' != *p1 && '.' != *p1 && 
	    ',' != *p1 && ':' != *p1 && 
	    '/' != *p1)
	    bad++;
	else if (structured && rfc822_special(*p1))
	    bad++;
    }
    l = (p1 - buffer) + 3 * bad;
    work = safe_malloc(l+1);

    for (p1 = buffer; *p1 && i < l; p1++) {
	if (' ' == *p1) {
	    work[i++] = '_';
	} else if (((*p1 < '0' || *p1 > '9') &&
		    (*p1 < 'a' || *p1 > 'z') &&
		    (*p1 < 'A' || *p1 > 'Z') &&
		    '-' != *p1 && '+' != *p1 &&
		    '.' != *p1 && ',' != *p1 &&
		    ':' != *p1 && '/' != *p1) 
		   ||
		   (structured && rfc822_special(*p1))) {
	    unsigned char val = *p1;

	    work[i++] = '=';
	    work[i++] = hexchars[val / 16];
	    work[i++] = hexchars[val % 16];	    
	} else
	    work[i++] = *p1;

	/* We not test len (i+clen > 70) here -- 
	   caller should do splitting 
	*/
    }

    if (i > 0) {
	work[i] = '\0';
	
	ret = strmcat(ret,"=?");
	ret = strmcat(ret,cs);
	if (lang) {
	    ret = strmcat(ret,"*");
	    ret = strmcat(ret,lang);
	}
	ret = strmcat(ret,"?Q?");
	ret = strmcat(ret,work);
	ret = strmcat(ret,"?=");
	*Elen = i + clen + 8;

	if (lang)
	    *Elen += llen + 1;

	DPRINT(Debug,32,(&Debug, 
			 "hdr_tencode=%s (len=%d structured=%d)\n",
			 ret,*Elen,structured));

    } else
	*Elen = 0;

    free(work);
    return ret;
}

static char * hdr_encode P_((const struct string *buffer,
			     int structured, int do_ascii_prefix,
			     int *is_encoded));
static char * hdr_encode(buffer,structured,do_ascii_prefix,is_encoded)
     const struct string *buffer;
     int structured;
     int do_ascii_prefix;
     int *is_encoded;
{
	
    const char * cname = get_string_MIME_name(buffer);
    const char * lang  = get_string_lang(buffer);
    
    int overhead  = strlen(cname) + 8 + (lang ? strlen(lang) + 1 : 0);
    int splitlen  = 75 - overhead;
    int blen = string_len(buffer);
    char * ret = NULL;
    int X;
    int need_sep = 0;

    if (!cname)
	cname = "UNKNOWN-8BIT";


    DPRINT(Debug,32,(&Debug, 
		     " (hdr_encode cname=%s, lang=%s, structured=%d)",
		     cname,lang ? lang : "<none>",structured));

    splitlen  -=   splitlen/8;    /* Rough estimate */

    if (splitlen < 1)        /* Should not happen .... */
	splitlen = 1;

    X = 0;

    /* Do not mime-encode Re: and similar prefix */
    if (do_ascii_prefix) {
	int last_ok = -1;
	int lastch = '\0';
	int x;
	uint16 prev = 0x0000;

	for (x = 0; x < blen; x++) {
	    uint16 u = give_unicode_from_string(buffer,x);

	    if (0x0009 /* HT  '\t' */ == u) {
		last_ok = x;
		lastch = '\t';
	    } else if (0x0020 /* SPACE  */ == u) {
		last_ok = x;
		lastch = ' ';
	    } else if (u < 0x0020 || u >= 0x007F) 
		break;
	    else if ((0x003D /* = */ == prev &&
		      0x003F /* ? */ == u)    ||
		     (0x003F /* ? */ == prev &&
		      0x003D /* = */ == u))
		break;
	    else if (structured && unicode_is_special(u,TOK_mail))
		break;
	    
	    prev =  u;
	}

	if (x >= blen) {   /* Whole string ascii */
	    last_ok = x;
	    lastch = '\0';
	}

	if (last_ok > 1) {
	    struct string * split = clip_from_string(buffer,&X,last_ok);
	    struct string *temp = ascify_string(split);
	    const char * MIME_name   = get_string_MIME_name(temp);

	    if (MIME_name && 0 == istrcmp(MIME_name,"US-ASCII")) {

		ret = us2s(stream_from_string(temp,0,NULL));

		if (lastch) {
		    char fill[2];
		    
		    fill[0] = lastch;
		    fill[1] = '\0';
		    
		    ret = strmcat(ret,fill);
		    X++;
		}

		DPRINT(Debug,32,(&Debug, "ascii prefix=%s\n",ret));

	    } else
		X = 0;


	    free_string(&temp);
	    free_string(&split);
	}
	

    }


    while (X < blen) {
	struct string * split = NULL;
	char *tmp,*tmp2;
	int  oldX = X;
	int Elen;

	/* We are splitting on struct string and not result stream
	   because we no not want split on middle of UTF-8 character
	   or ISO 2022 escape sequnces
	*/

    restart:
	split = clip_from_string(buffer,&X,splitlen);
	tmp = us2s(stream_from_string(split,0,NULL));
	tmp2 = hdr_tencode(tmp,cname,&Elen,lang,structured);

	if (Elen > 75 && splitlen > 3) {
	    int rawlen = Elen - overhead;      /* Estimate */
	    int newlen;

	    /* ratio:   rawlen / splitlen  
	       target:  75 - overhead

	       new splitlen =  target / ratio
	    */

	    if (overhead < 70 && rawlen > 1)
		newlen = ( 75 - overhead ) * splitlen / rawlen;
	    else
		newlen = splitlen -2;

	    DPRINT(Debug,32,(&Debug, 
			     "hdr_encode restarting split_len=%d -> %d",
			     splitlen,newlen));


	    if (newlen >= splitlen)
		newlen = splitlen -2;

	    splitlen = newlen;
	    if (splitlen < 1)
		splitlen = 1;

	    DPRINT(Debug,32,(&Debug, " (really %d)\n",
			     splitlen));

	    X = oldX;
	    free(tmp);
	    free(tmp2);
	    free_string(&split);
	    
	    goto restart;
	}

	if (ret && need_sep)
	    ret = strmcat(ret," ");
	need_sep = 1;
	ret = strmcat(ret,tmp2);
	if (is_encoded)
	    *is_encoded = 1;

	free(tmp);
	free(tmp2);
	free_string(&split);	    
    }

    if (!ret)
	ret = safe_strdup("");
	   
    if (is_encoded) {
	DPRINT(Debug,32,(&Debug, " [is_encoded=%s]", 
			 *is_encoded ? "yes" :"no"));
    }

    return ret;
}

static char * hdr_comment_quote P_((char *str));
static char * hdr_comment_quote(str)
     char *str;
{
    char * ret;
    int bad = 0;
    char * p1;
    int l;
    int i = 0;

    for (p1 = str; *p1; p1++)
	if ('\\' == *p1 ||
	    '('  == *p1 ||
	    ')'  == *p1)
	    bad++;
    l = (p1 - str) + bad;

    ret = safe_malloc(l+1);
  
    for (p1 = str; *p1 && i < l; p1++) {
	if ('\\' == *p1 ||
	    '('  == *p1 ||
	    ')'  == *p1)
	    ret[i++] = '\\';
	ret[i++] = *p1;
    }
    ret[i] = '\0';

    return ret;
}

static char * hdr_phrase P_((const struct string *buffer,
			     charset_t defcharset, int enmime,
			     int *is_encoded,
			     int do_ascii_prefix));
static char * hdr_phrase(buffer,defcharset,enmime, is_encoded,do_ascii_prefix)
     const struct string *buffer;
     charset_t defcharset; 
     int enmime;
     int *is_encoded;
     int do_ascii_prefix;
{
    char * ret = NULL;
    const char * lang   = get_string_lang(buffer);
    struct string *temp;
    const char * MIME_name;
    char * tmp;
    int bad = 0;
    int hi = 0;
    char * p1;

    const char * A UNUSED_VAROK = get_string_MIME_name(buffer);

    DPRINT(Debug,32,(&Debug," (hdr_phrase cs=%s lang=%s) ",
		     A ? A : "<none>",
		     lang ? lang : "<none>"));

    *is_encoded = 0;

    if (!enmime) 
	 temp = convert_string(defcharset,buffer,1);
    else 
	temp = ascify_string(buffer);
    MIME_name = get_string_MIME_name(temp);

    tmp = us2s(stream_from_string(temp,0,NULL));

    for (p1 = tmp; *p1; p1++) {
	if ((*p1 < '0' || *p1 > '9') &&
	    (*p1 < 'a' || *p1 > 'z') &&
	    (*p1 < 'A' || *p1 > 'Z') &&
	    '-' != *p1 && ' ' != *p1)
	    bad++;
	if (*p1 & 128)
	    hi++;
    }
    
    if (!bad && ( !enmime 
		  || 
		  (
		   !lang &&
		   ( (MIME_name && 0 == istrcmp(MIME_name,"US-ASCII")) 
		     ||
		     charset_ok_p(get_string_type(temp))
		     )
		   )
		  )
	)
	ret = strmcat(ret,tmp);
    else if (!enmime || 
	     ( 
	      !lang &&
	      (!hi && ((MIME_name && 0 == istrcmp(MIME_name,"US-ASCII")) 
		       ||
		       charset_ok_p(get_string_type(temp)))
	       ))
	     ) {
	int l = (p1-tmp) + bad + 2;
	int i = 0;
	char * work = safe_malloc(l+1);

	work[i++] = '"';
	for (p1 = tmp; *p1 && i < l-1; p1++) {
	    if ('\\' == *p1 || '"' == *p1)
		work[i++] = '\\';
	    work[i++] = *p1;
	}
	work[i++] = '"';
	work[i] = '\0';
	ret = strmcat(ret,work);
	free(work);
	
	*is_encoded = -1;

    } else {
	/* We do not use hdr_tencode() and tmp here because
	   charset may be UTF-8 charset or some ISO 2022 variant 
	   and we need do splitting for them also correctly
	*/
	char * work = hdr_encode(temp,1,do_ascii_prefix && !lang && !bad, 
				 is_encoded);

	ret = strmcat(ret,work);
	free(work);
    }
		 
    free(tmp);
    free_string(&temp);
    return ret;
}

static char * hdr_comment P_((const struct string *buffer,
			      charset_t defcharset, int enmime,
			      int *is_encoded,int do_ascii_prefix));
static char * hdr_comment(buffer,defcharset,enmime, is_encoded,do_ascii_prefix)
     const struct string *buffer;
     charset_t defcharset; 
     int enmime;
     int *is_encoded;
     int do_ascii_prefix;
{
    char * ret;
    const char * lang   = get_string_lang(buffer);
    const char * A   UNUSED_VAROK = get_string_MIME_name(buffer);

    DPRINT(Debug,32,(&Debug," (hdr_comment cs=%s lang=%s) ",
		     A ? A : "<none>",
		     lang ? lang : "<none>"));

    *is_encoded = 0;

    if (!enmime) {
	struct string *temp = convert_string(defcharset,buffer,1);
	char *tmp = us2s(stream_from_string(temp,0,NULL));

	ret = hdr_comment_quote(tmp);

	free_string(&temp);
	free(tmp);
    } else {
	char *A, *B;
	struct string *temp = ascify_string(buffer);
	const char * MIME_name   = get_string_MIME_name(temp);

	ret = NULL;
	
	if (!lang &&
	    MIME_name && 0 == istrcmp(MIME_name,"US-ASCII")) {

	    char *tmp = us2s(stream_from_string(temp,0,NULL));

	    if (NULL != (A = strstr(tmp,"=?")) &&
		NULL != (B = strstr(tmp,"?=")) &&
		B > A) {
		/* oops */
	    } else
		ret = hdr_comment_quote(tmp);
	    free(tmp);
	}

	if (!ret) {
	    ret = hdr_encode(temp,1,do_ascii_prefix && !lang,is_encoded);
	}

	free_string(&temp);
    }
    return ret;
}

static char * hdr_text P_((const struct string *buffer,
			   charset_t defcharset, int enmime,
			   int *is_encoded,int do_ascii_prefix));
static char * hdr_text(buffer,defcharset,enmime,is_encoded,do_ascii_prefix)
     const struct string *buffer;
     charset_t defcharset; 
     int enmime;
     int *is_encoded;
     int do_ascii_prefix;
{
    char * ret;
    const char * lang   = get_string_lang(buffer);
    const char * A UNUSED_VAROK  = get_string_MIME_name(buffer);

    DPRINT(Debug,32,(&Debug," (hdr_text cs=%s lang=%s) ",
		     A ? A : "<none>",
		     lang ? lang : "<none>"));

    *is_encoded = 0;

    if (!enmime) {
	struct string *temp = convert_string(defcharset,buffer,1);
	ret = us2s(stream_from_string(temp,0,NULL));
	free_string(&temp);
    } else {
	char *A, *B;
	struct string *temp = ascify_string(buffer);
	const char * MIME_name  = get_string_MIME_name(temp);
	ret = NULL;

	if (!lang &&
	    MIME_name && 0 == istrcmp(MIME_name,"US-ASCII")) {
	    ret = us2s(stream_from_string(temp,0,NULL));

	    if (NULL != (A = strstr(ret,"=?")) &&
		NULL != (B = strstr(ret,"?=")) &&
		B > A) {
		free(ret);
		ret = NULL;
	    }
	}

	if (!ret) {
	    ret = hdr_encode(temp,0,do_ascii_prefix && !lang,is_encoded);

	}

	free_string(&temp);
    }
    return ret;
}

/*    *is_encoded = 1  ... mime encoded
      *is_encoded = -1     quoted phrase
 */


/* class is one of HDR_PHRASE, HDR_COMMENT, HDR_TEXT */
char * string_to_hdr(class,buffer,defcharset,enmime,is_encoded,do_ascii_prefix)
     int class;
     const struct string *buffer;
     charset_t defcharset; 
     int enmime;
     int *is_encoded;
     int do_ascii_prefix;
{
    char * ret = NULL;
    const char * MIME_name UNUSED_VAROK = get_charset_MIME_name(defcharset);
    int IS_ENCODED = 0;

    switch(class) {
    case HDR_PHRASE:
	ret = hdr_phrase(buffer,defcharset,enmime,&IS_ENCODED,do_ascii_prefix);
	break;
    case HDR_COMMENT:
	ret = hdr_comment(buffer,defcharset,enmime,&IS_ENCODED,do_ascii_prefix);
	break;
    case HDR_TEXT:
	ret = hdr_text(buffer,defcharset,enmime,&IS_ENCODED,do_ascii_prefix);
	break;
    }

    DPRINT(Debug,30,(&Debug, 
		     "string_to_hdr=%s  (class=%d, buffer=%p, defcharset=%p '%s', enmime=%d, do_ascii_prefix=%d)  .. encoded=%d\n",
		     ret,
		     class,buffer,
		     defcharset,
		     MIME_name ? MIME_name : "<none>",
		     enmime,
		     do_ascii_prefix,
		     IS_ENCODED));    

    if (is_encoded)
	*is_encoded = IS_ENCODED;

    return ret;
}

/*
 * Local Variables:
 *  mode:c
 *  c-basic-offset:4
 *  buffer-file-coding-system: iso-8859-1
 * End:
 */
