static char rcsid[] = "@(#)$Id: unicode.c,v 2.9 2023/12/13 16:55:32 hurtta Exp $";

/******************************************************************************
 *  The Elm (ME+) Mail System  -  $Revision: 2.9 $   $State: Exp $
 *
 *  Author: Kari Hurtta <hurtta+elm@siilo.FMI.FI> 
 *                  (was hurtta+elm@posti.FMI.FI, hurtta+elm@ozone.FMI.FI)
 *      or  Kari Hurtta <elm@elmme-mailer.org>
 *****************************************************************************/

#include "elm_defs.h"
#include "s_me.h"
#include "cs_imp.h"
#include "unidata.h"

DEBUG_VAR(Debug,__FILE__,"charset");

/* NOTE:

              UOP_printable   returns 0 if character is not known to be
                              printable

              UOP_noctrl      returns 0 if character is known to be
                              control character

	      UOP_space       return 0 if character is not known or
	                      is not space
*/

/* Returns 0 if char not OK, otherwise char or converted char */
uint16 unicode_ch(ch,op)
     unsigned int ch; 
     enum unicode_op op; 
{
    struct unidata_mapped_data * XX = default_unidata();
   
    if (XX) {
	static int flip = 0;
	
	if (!flip) {
	    flip = 1;
	    DPRINT(Debug,4,(&Debug,
			    "unicode_ch called -- got unidata\n"));
	}
	
	if (ch <= 0xFFFF) {
	    struct character_information info;
	    uint16 dummy[1];
	    int Y ;
	    
	    /* bzero is defined on hdrs/defs.h */
	    bzero((void *)&info, sizeof info);
	    
	    info.character_type = 0;
	    
	    Y = unicode_lookup_character(XX,ch,&info,dummy,0);
	    
	    if (Y < 0)  {       /* database bad */
		DPRINT(Debug,61,(&Debug,
				 "unicode_ch: failed to look at %04X -- database bad\n",
				 ch));
		goto fail;
	    }
	    
	    if (Y == 0) {       /* failure (not valid character) */
		DPRINT(Debug,61,(&Debug,
				 "unicode_ch: failed to look at %04X -- not a valid character or control character\n",
				 ch));
		if (!info.character_type) {
		    DPRINT(Debug,61,(&Debug,
				     "unicode_ch-No character type information for %04X\n",
				     ch));
		    goto bad;
		}
	    }
	    
	    switch(op) {

	    case UOP_none: 
		break;
	    case UOP_lowercase:
		if (info.lower)
		    ch = info.lower;
		break;
	    case UOP_noctrl: 
		
		/* Other, Control          */
		if (info.character_type == CHARTYPE_Cc)
		    ch = 0;
		
		/* Other, Not Assigned     */
		else if (info.character_type == CHARTYPE_Cn)
		    ch = 0;
		
		break;
		
	    case UOP_printable:
		
		/* Number                  */
		if (info.character_type & CHARTYPE_Number)
		    break;
		
		/* Separator, Space        */
		else if (info.character_type == CHARTYPE_Zs)
		    break;
		
		/* Letter                  */
		else if (info.character_type & CHARTYPE_Letter)
		    break;
		
		/* Punctuation             */
		else if (info.character_type & CHARTYPE_Punctuation)
		    break;
		
		/* Symbol                  */
		else if (info.character_type & CHARTYPE_Symbol)
		    break;
		
		else
		    ch = 0;
		break;
		
	    case UOP_space:
		
		/* Separator, Space        */
		if (info.character_type == CHARTYPE_Zs)
		    break;
		else
		    ch = 0;
		break;
		
	    }
	    
	    
	} else { /* Invalid character */
	    
	bad:
	    switch(op) {
	    case UOP_printable:     /* Invalid character is not printable */
	    case UOP_noctrl:        /* Consider invalid character as control */
	    case UOP_space:         /* Invalid character is not space */
		ch = 0;
		break;
	    default:
		break;
	    }
	}

    } else {
	static int flip = 0;

	if (!flip) {
	    flip = 1;
	    DPRINT(Debug,4,(&Debug,
			    "unicode_ch called -- no unidata\n"));
	}



    fail:

	switch(op) {
	case UOP_none: 
	    break;
	case UOP_lowercase:
	    if (ch >= 0x0041 && ch <= 0x005A)               /* ASCII range  */
		ch = ( ch - 0x0041) + 0x0061;
	    else if (ch >= 0x00C0 && ch <= 0x00D6)          /* LATIN1 range */
		ch = ( ch - 0x00C0) + 0x00E0;
	    else if (ch >= 0x00D8 && ch <= 0x00DE)          /* LATIN1 range */
		ch = ( ch - 0x00D8) + 0x00F8;
	    break;
	case UOP_noctrl: 
	    if (ch <= 0x001F)                              /* ASCII ctrl range */
		ch = 0;
	    else if (ch >= 0x007F && ch <= 0x009F)
		ch = 0;
	    break;
	case UOP_printable:
	    if (ch >= 0x0020 && ch <= 0x007E)              /* ASCII range */
		break;
	    else if (ch >= 0x00A0 && ch <= 0x00FF)         /* Latin1 range */
		break;
	    else
		ch = 0;                          /* Not known if printable */
	    break;
	case UOP_space:

	    if (ch == 0x0020)                               /* Ascii space */
		break;
	    else if (ch == 0x00A0)                /* non breaking space */
		break;
	    else
		ch = 0;                          /* Not known if space */
	    break;

	}
    }

    return ch;
}

/* Returns 1 if compressed */
int compress_unicode(words,len)
     uint16  *words; 
     int *len;
{
    struct unidata_mapped_data * XX = default_unidata();
    
    if (XX && *len > 0) {
	int have_comp = 0;

	int  L = *len,i;
	uint16  *new_buffer = safe_calloc(L, sizeof (uint16));
	int X = 0;

	for (i = 0; i < *len && X < L;) {
	    int comp_type;
	    int ret = unicode_compress_input(XX,
					     &(new_buffer[X]),
					     words + i,
					     *len - i,
					     &comp_type);
	    if (ret < 1) {
		DPRINT(Debug,61,(&Debug,
				"unicode compress failes at [%d] %04X, ret=%d\n",
				i,words[i],ret));
		
		/* Skip (copy) character */
		
		new_buffer[X] = words[i];
		X++;
		i++;
		continue;
	    }  

	    if (comp_type != DECOMP_canonical) {
		int j;
		DPRINT(Debug,62,(&Debug,
				 "unicode compress type %d discarded:",
				 comp_type));
		for (j = 0; j < ret; j++) {
		    DPRINT(Debug,62,(&Debug," %04X",
				     words[i+j]));
		}
		DPRINT(Debug,62,(&Debug," => %04X\n",
				 new_buffer[X]));

		/* Discard compression */
		new_buffer[X] = words[i];
		ret = 1;
	    } else
		have_comp = 1;

	    if (ret != 1 || new_buffer[X] != words[i]) {
		int j;
		DPRINT(Debug,62,(&Debug,
                                "unicode compress:"));
		for (j = 0; j < ret; j++) {
		    DPRINT(Debug,62,(&Debug," %04X",
				     words[i+j]));
		}
		DPRINT(Debug,62,(&Debug," => %04X\n",
				 new_buffer[X]));
	    }

	    X++;
	    /* ret characters consumed */
	    i += ret;
	}
	DPRINT(Debug,61,(&Debug,
			 "unicode compress  len %d => %d (%s)\n",
			 *len,X,
			 have_comp ? "processed" : "no compression"));

	for (i = 0; i < X; i++) {
	    words[i] = new_buffer[i];
	}
	*len = X;
	free(new_buffer);

	return have_comp;
    }
    return 0;
}


/* ---------------------------------------------------------------------- */

struct unidata_mapped_data * default_unidata()
{
    static struct unidata_mapped_data * res = NULL;
    static struct unidata_mapped_data * res_global = NULL;

    if (! unidata_path[0]) {
	DPRINT(Debug,4,(&Debug,
			"UNIDATA path not expanded yet"));

	if (unidata_path_global[0] && 
	    0 != strcmp(unidata_path_global,"none") && !res_global) {
	    
	    static int tried = 0;

	    if (!tried) {
		tried = 1;

		/* get_unidata() may cause error message to
		   be printed ... error message may cause
		   default_unidata() to be called again ....
		*/

		DPRINT(Debug,4,(&Debug,
				" ... Loading (global) UNIDATA information ... (file %s)\n",
				unidata_path_global));

		if (!get_unidata(&res_global,unidata_path_global)) {
		    DPRINT(Debug,4,(&Debug,
				    "... loading of UNIDATA information failed\n"));
		} 
	    }	    
	}

	if (res_global) {
	    DPRINT(Debug,4,(&Debug,
			    " ... using global.\n"));
	} else {
	    DPRINT(Debug,4,(&Debug,"\n"));
	    
	    DPRINT(Debug,7,(&Debug,"default_unidata=NULL:  unidata_path_global=%s\n",
			    unidata_path_global));
	}

	return res_global;
    }

    if (0 != strcmp(unidata_path,"none")  && !res) {	
	static int tried = 0;

	if (0 == strcmp(unidata_path,unidata_path_global) && res_global) {

	    DPRINT(Debug,4,(&Debug,
			    "Using global as user UNIDATA  ... (file %s)\n",
			    unidata_path));

	    res = res_global;
	} else if (!tried) {
	    tried = 1;

	    /* get_unidata() may cause error message to
	       be printed ... error message may cause
	       default_unidata() to be called again ....
	    */

	    DPRINT(Debug,4,(&Debug,
			    "Loading UNIDATA information ... (file %s)\n",
			    unidata_path));
	    if (!get_unidata(&res,unidata_path)) {
		DPRINT(Debug,4,(&Debug,
				"... loading of UNIDATA information failed\n"));
	    }
	    
	}
    }

    if (!res) {
	DPRINT(Debug,7,(&Debug,"default_unidata=NULL:  unidata_path=%s\n",unidata_path));
    }

    return res;
}

/*
 * Local Variables:
 *  mode:c
 *  c-basic-offset:4
 *  buffer-file-coding-system: iso-8859-1
 * End:
 */
