/* This file is part of the KDE libraries
   Copyright (C) 1999 Ian Zepp (icszepp@islc.net)

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public License
   along with this library; see the file COPYING.LIB.  If not, write to
   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   Boston, MA 02110-1301, USA.
*/

#include "kstringhandler.h"

bool KStringHandler::isUtf8(const char *buf)
{
    int i, n;
    register unsigned char c;
    bool gotone = false;

    if (!buf)
	return true;		// whatever, just don't crash

    const unsigned char F = 0;	/* character never appears in text */
    const unsigned char T = 1;	/* character appears in plain ASCII text */
    const unsigned char I = 2;	/* character appears in ISO-8859 text */
    const unsigned char X = 3;	/* character appears in non-ISO extended ASCII (Mac, IBM PC) */

    static const unsigned char text_chars[256] = {
	/*                  BEL BS HT LF    FF CR    */
	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,	/* 0x0X */
	/*                              ESC          */
	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,	/* 0x1X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,	/* 0x2X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,	/* 0x3X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,	/* 0x4X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,	/* 0x5X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,	/* 0x6X */
	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,	/* 0x7X */
	/*            NEL                            */
	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,	/* 0x8X */
	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,	/* 0x9X */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,	/* 0xaX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,	/* 0xbX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,	/* 0xcX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,	/* 0xdX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,	/* 0xeX */
	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I	/* 0xfX */
    };

    /* *ulen = 0; */
    for (i = 0; (c = buf[i]); i++) {
	if ((c & 0x80) == 0) {	/* 0xxxxxxx is plain ASCII */
	    /*
	     * Even if the whole file is valid UTF-8 sequences,
	     * still reject it if it uses weird control characters.
	     */

	    if (text_chars[c] != T)
		return false;

	} else if ((c & 0x40) == 0) {	/* 10xxxxxx never 1st byte */
	    return false;
	} else {		/* 11xxxxxx begins UTF-8 */
	    int following;

	    if ((c & 0x20) == 0) {	/* 110xxxxx */
		following = 1;
	    } else if ((c & 0x10) == 0) {	/* 1110xxxx */
		following = 2;
	    } else if ((c & 0x08) == 0) {	/* 11110xxx */
		following = 3;
	    } else if ((c & 0x04) == 0) {	/* 111110xx */
		following = 4;
	    } else if ((c & 0x02) == 0) {	/* 1111110x */
		following = 5;
	    } else
		return false;

	    for (n = 0; n < following; n++) {
		i++;
		if (!(c = buf[i]))
		    goto done;

		if ((c & 0x80) == 0 || (c & 0x40))
		    return false;
	    }
	    gotone = true;
	}
    }
  done:
    return gotone;		/* don't claim it's UTF-8 if it's all 7-bit */
}

