static char rcsid[] = "@(#)$Id: stringtok.c,v 2.9 2022/07/18 15:40:44 hurtta Exp $";

/******************************************************************************
 *  The Elm (ME+) Mail System  -  $Revision: 2.9 $   $State: Exp $
 *
 *  Author: Kari Hurtta <hurtta+elm@siilo.FMI.FI> 
 *                  (was hurtta+elm@posti.FMI.FI, hurtta+elm@ozone.FMI.FI)
 *      or  Kari Hurtta <elm@elmme-mailer.org>
 *****************************************************************************/

#include "elm_defs.h"

/* 
   string_tokenize()
        should do for 'struct string' same than
   rfc822_tokenize()
        does for 'char *' (getaddr.c)

   however do not produce exactly same result
*/

DEBUG_VAR(Debug,__FILE__,"addr");

#define  unicode_backlash    0x005C  /* '\\' */

static struct pairs {
    uint16 start;
    uint16 end;
} pairs[] = {
  { 0x0022  /* " */,  0x0022  /* " */ },
  { 0x0028  /* ( */,  0x0029  /* ) */ },
  { 0x005B  /* [ */,  0x005D  /* ] */ }
};


int unicode_is_special(x,flags)
     int x;
     int flags;
{
    switch (x) {    
    case 0x0028    /* '(' */ :
    case 0x0029    /* ')' */ : 
    case 0x002C    /* ',' */ : 
    case 0x003A    /* ':' */ :
    case 0x003B    /* ';' */ : 
    case 0x005C    /* '\\'*/ : 
    case 0x0022    /* '"' */ : 
    case 0x005B    /* '[' */ :
    case 0x005D    /* ']' */ :
    case 0x003C    /* '<' */ : 
    case 0x003E    /* '>' */ :
    case 0x0040    /* '@' */ : 

	return x;
    }

    if (flags & TOK_mail) {
	switch (x) {    
	case 0x002E    /* '.' */ : 

	    return x;
	}
    }

    if (flags & TOK_mime) {
	switch (x) {    
	case 0x003D /* = */  :
	case 0x002F /* / */  :
	case 0x003F /* ? */  :
	    
	    return x;
	}	
    }
    
    return 0;
}

/* "space" for  tokenizer */
int unicode_is_space(x)
     int x;
{
    switch (x) {    
    case 0x0020    /* SPACE */   :
    case 0x0009    /* HT  '\t' */:
    case 0x000A    /* LF  '\n' */:
    case 0x000D    /* CR  '\r' */:
	/* Unicode EOLN ??? */
	return x;	
    }
    return 0;
}

#define NUM_pair ( sizeof pairs / sizeof (pairs[0]))

struct string_token * string_tokenize(line,flags)
     const struct string *line;
     int flags;
{
    struct string_token * result = NULL;
    int    result_len = 0;

    const int linelen = string_len(line);

    int i;

    DPRINT(Debug,25,(&Debug, 
		     "string_tokenize: line=%S\n",
		     line));

    for (i = 0; i < linelen; ) {

	const uint16 code = give_unicode_from_string(line,i);

	struct pairs   * mode  = NULL;
	struct string  * token = NULL;
	int              special = 0;
	enum token_status status = token_parsed;
	int j;

	for (j = 0; j < NUM_pair; j++) {
	    if (code == pairs[j].start)
		mode = &pairs[j];
	}

	/* Look quoted string */
	if (mode) {
	    int start = i;
	    int len;
	    int depth = 1;

	    special = mode->start;

	    for (i++; i < linelen; i++) {
		const uint16 code = give_unicode_from_string(line,i);

		if (unicode_backlash == code)
		    i++;  /* skip next */
		else if (mode->end == code)
		    depth--;
		else if (mode->start == code)
		    depth++;
		if (0 == depth) {
		    i++;
		    break;
		}
	    }
	    len = i - start;

	    token = clip_from_string(line,&start,len);

	    if (depth > 0)
		status = token_icomplete;
	    
	    goto add_token;
	}

	if (unicode_is_space(code)) {
	    int start = i;
	    int len;
	    special = 0x0020 /* SPACE  */;

	    for (i++; i < linelen; i++) {
		const uint16 code = give_unicode_from_string(line,i);

		if (!unicode_is_space(code))
		    break;
	    }
	    len = i - start;
	    
	    token = clip_from_string(line,&start,len);
	    
	    goto add_token;       
	}

	special = unicode_is_special(code,flags);
	if (!special) {
	    int start = i;
	    int len;

	    for (i++; i < linelen; i++) {
		const uint16 code = give_unicode_from_string(line,i);

		if (unicode_is_space(code))
		    break;
		if (unicode_is_special(code,flags))
		    break;
	    }
	    len = i - start;

	    token = clip_from_string(line,&start,len);

	    goto add_token;
	}
	         
	/* Is special */
	if (unicode_backlash == code) {
	    token = clip_from_string(line,&i,2);
	} else
	    token = clip_from_string(line,&i,1);

    add_token:

	DPRINT(Debug,25,(&Debug, 
			 "string_tokenize: [%d] special=%04d token=%S\n",
			 result_len,special,token));

	result = safe_array_realloc(result, (result_len+2), sizeof (result[0]));
	result[result_len].special = special;
	result[result_len].token   = token;
	result[result_len].status  = status;
	result[result_len+1].special = 0;
	result[result_len+1].token   = NULL;
	result[result_len+1].status  = token_fail;
	result_len++;
    }

    if (!result) {
	result = safe_realloc(result, sizeof (result[0]));
	result[0].special = 0;
	result[0].token   = NULL;
	result[0].status  = token_fail;
    }

    DPRINT(Debug,25,(&Debug, 
		     "string_tokenize=%p  result len=%d\n",
		     result,result_len));

    return result;
}

void free_string_tokenized(ptr)
     struct string_token **ptr;
{
    struct string_token *res = *ptr;
    int i;

    for (i = 0; res[i].token; i++) {
	free_string(& res[i].token);
    }

    free(res);
    res = NULL;

    *ptr = res;
}

int string_need_quote(value)
     const struct string * value;
{
    int len = string_len(value);
    int i;
    
    if (len < 1)
	return 1;

    for (i = 0; i < len; i++) {
	uint16 code = give_unicode_from_string(value,i);

	if (code <= 0x0020  /* space */ )
	    return 1;

	if (code >= 0x0022  /*  " */  &&
	    code <= 0x002F  /* / */)
	    return 1;

	if (code >= 0x003A  /*  : */  &&
	    code <= 0x003F  /*  ? */)
	    return 1;
	
	/* So that can be used both MIME and mail address content ... */

	if (code == UNICODE_BAD_CHAR ||
            unicode_is_special(code,TOK_mail|TOK_mime) ||
	    unicode_is_space(code) ||
	    unicode_ch(code,UOP_space))
            return 1;

    }

    return 0;
}


/*
 * Local Variables:
 *  mode:c
 *  c-basic-offset:4
 *  buffer-file-coding-system: iso-8859-1
 * End:
 */
