//
// PyUnicode.cpp -- Unicode string type for Python
//
// @doc

#include "windows.h"
#include "Python.h"
#include "PyWinTypes.h"
#include "PyWinObjects.h"
#include "malloc.h"
#include "wchar.h"

PyObject *PyUnicodeObject_FromString(const char *string)
{
	return new PyUnicode(string);
}
// String conversions
// Convert a Python string object to a BSTR - allow embedded NULLs, etc.
BOOL PyString_AsBstr(PyObject *stringObject, BSTR *pResult)
{
	int size=PyString_Size(stringObject);
	const char *buf = PyString_AsString(stringObject);
	if (buf==NULL) return FALSE;

	/* compute the max possible size ("size" may contain multi-byte chars) */
	int wideSize = size*2;

	LPWSTR wstr = (LPWSTR)alloca(wideSize);

	/* convert and get the final character size */
	size = MultiByteToWideChar(CP_ACP, 0, buf, size, wstr, wideSize);
	*pResult = SysAllocStringLen(wstr, size);
	return TRUE;
}

// Convert a Python object to a BSTR - allow embedded NULLs, None, etc.
BOOL PyWinObject_AsBstr(PyObject *stringObject, BSTR *pResult, BOOL bNoneOK /*= FALSE*/,DWORD *pResultLen /*= NULL*/)
{
	BOOL rc = TRUE;
	if (PyString_Check(stringObject))
		rc = PyString_AsBstr(stringObject, pResult);
	else if (PyUnicode_Check(stringObject))
	{
		// copy the value, including embedded NULLs
		BSTR v = ((PyUnicode *)stringObject)->m_bstrValue;
		*pResult = SysAllocStringLen(v, SysStringLen(v));
	}
	else if (stringObject == Py_None) {
		if (bNoneOK) {
			*pResult = NULL;
		} else {
			PyErr_SetString(PyExc_TypeError, "None is not a valid string in this context");
			rc = FALSE;
		}
	} else {
		PyErr_SetString(PyExc_TypeError, "The object can not be converted to a Unicode object");
		rc = FALSE;
	}
	if (rc && !pResult) {
		PyErr_SetString(PyExc_MemoryError, "Allocating BSTR");
		return FALSE;
	}
	if (rc && pResultLen) *pResultLen = SysStringLen(*pResult);
	return rc;
}

void PyWinObject_FreeBstr(BSTR str)
{
	SysFreeString(str);
}

// Convert a WCHAR string to "char *"
//  If len is known, pass it, else -1
// NOTE - string must be freed with PyWinObject_FreeString
BOOL PyWin_WCHAR_AsString(WCHAR *input, DWORD inLen, char **pResult)
{
	if (inLen==-1)
		inLen = wcslen(input);
	char *buf = (char *)malloc(inLen+1);

	// ### I think the output buffer size could be wrong!
	DWORD len = WideCharToMultiByte(CP_ACP, 0, input, inLen, buf, inLen, NULL, NULL);
	if (len==0) {
		free(buf);
		PyWin_SetAPIError("WideCharToMultiByte");
		return FALSE;
	}
	buf[len]='\0';
	*pResult = buf;
	return TRUE;
}

BOOL PyWin_Bstr_AsString(BSTR input, char **pResult)
{
	DWORD wideSize = SysStringLen(input);
	return PyWin_WCHAR_AsString(input, wideSize, pResult);
}

// Convert a Python object to a "char *" - allow embedded NULLs, None, etc.
BOOL PyWinObject_AsString(PyObject *stringObject, char **pResult, BOOL bNoneOK /*= FALSE*/, DWORD *pResultLen /* = NULL */)
{
	int strLen;
	BOOL rc = TRUE;
	if (PyString_Check(stringObject)) {
		strLen = PyString_Size(stringObject);
		*pResult = (char *)malloc((strLen + 1) * sizeof(char));
		if (*pResult==NULL) {
			PyErr_SetString(PyExc_MemoryError, "copying string");
			return FALSE;
		}
		memcpy(*pResult, PyString_AsString(stringObject), strLen);
		(*pResult)[strLen] = '\0';
	} else if (PyUnicode_Check(stringObject)) {
			strLen = SysStringLen(((PyUnicode *)stringObject)->m_bstrValue);
			rc = PyWin_Bstr_AsString(((PyUnicode *)stringObject)->m_bstrValue, pResult);
	} else if (stringObject == Py_None) {
		strLen = 0;
		if (bNoneOK) {
			*pResult = NULL;
		} else {
			PyErr_SetString(PyExc_TypeError, "None is not a valid string in this context");
			rc = FALSE;
		}
	} else {
		PyErr_SetString(PyExc_TypeError, "The object can not be converted to a string object");
		rc = FALSE;
	}
	if (rc && !pResult) {
		PyErr_SetString(PyExc_MemoryError, "Allocating string");
		return FALSE;
	}
	if (rc && pResultLen) *pResultLen = strLen;
	return rc;
}

void PyWinObject_FreeString(char *str)
{
	free(str);
}



// PyWinObject_FromBstr - convert a BSTR into a Python string.
//
// ONLY USE THIS FOR TRUE BSTR's - Use the fn below for OLECHAR *'s.
// NOTE - does not use standard macros, so NULLs get through!
PyObject *PyWinObject_FromBstr(const BSTR bstr, BOOL takeOwnership/*=FALSE*/)
{
	if (bstr==NULL) {
		Py_INCREF(Py_None);
		return Py_None;
	}
	return new PyUnicode(bstr, takeOwnership);
}

// Size info is available (eg, a fn returns a string and also fills in a size variable)
PyObject *PyWinObject_FromOLECHAR(const OLECHAR * str, int numChars)
{
	if (str==NULL) {
		Py_INCREF(Py_None);
		return Py_None;
	}
	return new PyUnicode(str, numChars);
}

// No size info avail.
PyObject *PyWinObject_FromOLECHAR(const OLECHAR * str)
{
	if (str==NULL) {
		Py_INCREF(Py_None);
		return Py_None;
	}
	return new PyUnicode(str);
}


// @object PyUnicode|A Python object, representing a Unicode string.
// @comm A PyUnicode object is used primarily when exchanging string
// information across a COM interface.


static PySequenceMethods PyUnicode_SequenceMethods = {
	(inquiry)PyUnicode::lengthFunc,			/*sq_length*/
	(binaryfunc)PyUnicode::concatFunc,		/*sq_concat*/
	(intargfunc)PyUnicode::repeatFunc,		/*sq_repeat*/
	(intargfunc)PyUnicode::itemFunc,		/*sq_item*/
	(intintargfunc)PyUnicode::sliceFunc,	/*sq_slice*/
	0,		/*sq_ass_item*/
	0,		/*sq_ass_slice*/
};

PYWINTYPES_EXPORT PyTypeObject PyUnicodeType =
{
	PyObject_HEAD_INIT(&PyType_Type)
	0,
	"PyUnicode",
	sizeof(PyUnicode),
	0,
	PyUnicode::deallocFunc,		/* tp_dealloc */
	// @pymeth __print__|Used when the object is printed.
	PyUnicode::printFunc,		/* tp_print */
	PyUnicode::getattrFunc,		/* tp_getattr */
	0,						/* tp_setattr */
	// @pymeth __cmp__|Used when Unicode objects are compared.
	PyUnicode::compareFunc,	/* tp_compare */
	// @pymeth __repr__|Used when repr(object) is used.
	PyUnicode::reprFunc,	/* tp_repr */
	0,						/* tp_as_number */
	&PyUnicode_SequenceMethods,	/* tp_as_sequence */
	0,						/* tp_as_mapping */
	// @pymeth __hash__|Used when the hash value of an object is required
	PyUnicode::hashFunc,		/* tp_hash */
	0,						/* tp_call */
	// @pymeth __str__|Used when an (8-bit) string representation is required
	PyUnicode::strFunc,		/* tp_str */
};

PyUnicode::PyUnicode(void)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	m_bstrValue = NULL;
}

PyUnicode::PyUnicode(const char *value)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	/* use MultiByteToWideChar() as a "good" strlen() */
	/* NOTE: this will include the null-term in the length */
	int cchWideChar = MultiByteToWideChar(CP_ACP, 0, value, -1, NULL, 0);

	/* alloc a temporary conversion buffer */
	LPWSTR wstr = (LPWSTR)alloca(cchWideChar * sizeof(WCHAR));

	/* convert the input into the temporary buffer */
   	MultiByteToWideChar(CP_ACP, 0, value, -1, wstr, cchWideChar);

	/* don't place the null-term into the BSTR */
	m_bstrValue = SysAllocStringLen(wstr, cchWideChar - 1);
}

PyUnicode::PyUnicode(const char *value, unsigned int numBytes)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	m_bstrValue = SysAllocStringByteLen(value, numBytes);
}

PyUnicode::PyUnicode(const OLECHAR *value)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	m_bstrValue = SysAllocString(value);
}

PyUnicode::PyUnicode(const OLECHAR *value, int numChars)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	m_bstrValue = SysAllocStringLen(value, numChars);
}

PyUnicode::PyUnicode(const BSTR value, BOOL takeOwnership /* = FALSE */)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	if ( takeOwnership )
		m_bstrValue = value;
	else
		// copy the value, including embedded NULLs
		m_bstrValue = SysAllocStringLen(value, SysStringLen(value));
}

PyUnicode::PyUnicode(PyObject *value)
{
	ob_type = &PyUnicodeType;
	_Py_NewReference(this);

	m_bstrValue = NULL;
	(void)PyWinObject_AsBstr(value, &m_bstrValue);
}

PyUnicode::~PyUnicode(void)
{
	SysFreeString(m_bstrValue);
}

int PyUnicode::compare(PyObject *ob)
{
	int l1 = SysStringByteLen(m_bstrValue);
	OLECHAR *s = ((PyUnicode *)ob)->m_bstrValue;
	int l2 = SysStringByteLen(s);
	if ( l1 == 0 )
		if ( l2 > 0 )
			return -1;
		else
			return 0;
	if ( l2 == 0 && l1 > 0 )
		return 1;
	if ( l2 < l1 )
		l1 = l2;
	return memcmp(m_bstrValue, s, l1);
}

PyObject * PyUnicode::concat(PyObject *ob)
{
	if ( !PyUnicode_Check(ob) ) {
		PyErr_SetString(PyExc_TypeError, "illegal argument type for PyUnicode concatenation");
		return NULL;
	}

	BSTR s2 = ((PyUnicode *)ob)->m_bstrValue;
	int l1 = SysStringLen(m_bstrValue);
	int l2 = SysStringLen(s2);
	BSTR bres = SysAllocStringLen(NULL, l1 + l2);
	if ( m_bstrValue )
		memcpy(bres, m_bstrValue, l1 * sizeof(*bres));
	if ( s2 )
		memcpy(&bres[l1], s2, l2 * sizeof(*bres));
	bres[l1+l2] = L'\0';
	return new PyUnicode(bres, TRUE);
}

PyObject * PyUnicode::repeat(int count)
{
	int l = SysStringLen(m_bstrValue);
	if ( l == 0 )
		return new PyUnicode();

	BSTR bres = SysAllocStringLen(NULL, l * count);
	OLECHAR *p = bres;
	for ( int i = count; i--; p += l )
		memcpy(p, m_bstrValue, l * sizeof(*p));
	bres[l*count] = L'\0';
	return new PyUnicode(bres, TRUE);
}

PyObject * PyUnicode::item(int index)
{
	int l = SysStringLen(m_bstrValue);
	if ( index >= l )
	{
		PyErr_SetString(PyExc_IndexError, "unicode index out of range");
		return NULL;
	}
	OLECHAR s[2] = { m_bstrValue[index], L'\0' };
	return new PyUnicode(s);
}

PyObject * PyUnicode::slice(int start, int end)
{
	int l = SysStringLen(m_bstrValue);
	if ( start < 0 )
		start = 0;
	if ( end < 0 )
		end = 0;
	if ( end > l )
		end = l;
	if ( start == 0 && end == l )
	{
		Py_INCREF(this);
		return this;
	}
	if ( end <= start )
		return new PyUnicode();

	BSTR bres = SysAllocStringLen(&m_bstrValue[start], end - start);
	return new PyUnicode(bres, TRUE);
}

long PyUnicode::hash(void)
{
	/* snarfed almost exactly from stringobject.c */

	int orig_len = SysStringByteLen(m_bstrValue);
	register int len = orig_len;
	register unsigned char *p;
	register long x;

	p = (unsigned char *)m_bstrValue;
	x = *p << 7;
	while (--len >= 0)
		x = (1000003*x) ^ *p++;
	x ^= orig_len;
	if (x == -1)
		x = -2;
	return x;
}

PyObject * PyUnicode::asStr(void)
{
	if ( m_bstrValue == NULL )
		return PyString_FromString("");

	/*
	** NOTE: we always provide lengths to avoid computing null-term and
	** and to carry through any NULL values.
	*/

	/* how many chars (including nulls) are in the BSTR? */
	int cchWideChar = SysStringLen(m_bstrValue);

	/* get the output length */
	int cchMultiByte = WideCharToMultiByte(CP_ACP, 0, m_bstrValue, cchWideChar,
										   NULL, 0, NULL, NULL);

	/* alloc a temporary conversion buffer of the right length */
	LPSTR mstr = (LPSTR)alloca(cchMultiByte);

	/* do the conversion */
   	WideCharToMultiByte(CP_ACP, 0, m_bstrValue, cchWideChar,
						mstr, cchMultiByte, NULL, NULL);

	/* return the Python object */
	return PyString_FromStringAndSize(mstr, cchMultiByte);
}

int PyUnicode::print(FILE *fp, int flags)
{
	LPSTR s;
	if ( m_bstrValue )
	{
		/* NOTE: BSTR values are *always* null-termed */
		int numBytes = WideCharToMultiByte(CP_ACP, 0, m_bstrValue, -1, NULL, 0, NULL, NULL);
		s = (LPSTR)alloca(numBytes+1);
		WideCharToMultiByte(CP_ACP, 0, m_bstrValue, -1, s, numBytes, NULL, NULL);
	}
	else
		s = NULL;

//	USES_CONVERSION;
//	char *s = W2A(m_bstrValue);
	char resBuf[80];

	if ( s == NULL )
		strcpy(resBuf, "<PyUnicode: NULL>");
	else if ( strlen(s) > 40 )
	{
		s[40] = '\0';
		wsprintf(resBuf, "<PyUnicode: '%s'...>", s);
	}
	else
		wsprintf(resBuf, "<PyUnicode: '%s'>", s);

	//
    // ### ACK! Python uses a non-debug runtime. We can't use stream
	// ### functions when in DEBUG mode!!  (we link against a different
	// ### runtime library)  Hack it by getting Python to do the print!
	//
	// ### - Double Ack - Always use the hack!
// #ifdef _DEBUG
	PyObject *ob = PyString_FromString(resBuf);
	PyObject_Print(ob, fp, flags|Py_PRINT_RAW);
	Py_DECREF(ob);
/***#else
	fputs(resBuf, fp);
#endif
***/
	return 0;
}

PyObject *PyUnicode::repr()
{
	// This is not quite correct, but good enough for now.
	// To save me lots of work, I convert the Unicode to a temporary
	// string object, then perform a repr on the string object, then
	// simply prefix with an 'L' to indicate the string is Unicode.
	PyObject *obStr = asStr();
	if (obStr==NULL)
		return NULL;
	PyObject *obRepr = PyObject_Repr(obStr);
	Py_DECREF(obStr);
	if (obRepr==NULL)
		return NULL;

	char *szVal = PyString_AsString(obRepr);
	int strSize = PyString_Size(obRepr);
	Py_DECREF(obRepr);
	char *buffer = (char *)alloca(strSize+2); // trailing NULL and L
	buffer[0] = 'L';
	memcpy(buffer+1, szVal, strSize);
	buffer[strSize+1] = '\0';
	return PyString_FromStringAndSize(buffer, strSize+1);
}

PyObject * PyUnicode::upper(void)
{
	/* copy the value; don't worry about NULLs since _wcsupr doesn't */
	BSTR v = SysAllocString(m_bstrValue);

	/* upper-case the thing */
	if ( v )
		_wcsupr(v);

	/* wrap it into a new object and return it */
	return new PyUnicode(v, /* takeOwnership= */ TRUE);
}

PyObject * PyUnicode::lower(void)
{
	/* copy the value; don't worry about NULLs since _wcsupr doesn't */
	BSTR v = SysAllocString(m_bstrValue);

	/* upper-case the thing */
	if ( v )
		_wcslwr(v);

	/* wrap it into a new object and return it */
	return new PyUnicode(v, /* takeOwnership= */ TRUE);
}

static struct PyMethodDef PyUnicode_methods[] = {
	{ "upper",	PyUnicode::upperFunc,	METH_VARARGS },
	{ "lower",	PyUnicode::lowerFunc,	METH_VARARGS },
	{ NULL,		NULL }		/* sentinel */
};

PyObject * PyUnicode::getattr(char *name)
{
	if ( !strcmp(name, "raw") )
	{
		if ( m_bstrValue == NULL )
			return PyString_FromString("");

		int len = SysStringByteLen(m_bstrValue);
		return PyString_FromStringAndSize((char *)(void *)m_bstrValue, len);
	}

	return Py_FindMethod(PyUnicode_methods, this, name);
}

/*static*/ void PyUnicode::deallocFunc(PyObject *ob)
{
	delete (PyUnicode *)ob;
}

// @pymethod int|PyUnicode|__cmp__|Used when objects are compared.
int PyUnicode::compareFunc(PyObject *ob1, PyObject *ob2)
{
	return ((PyUnicode *)ob1)->compare(ob2);
}

// @pymethod int|PyUnicode|__hash__|Used when the hash value of a Unicode object is required
long PyUnicode::hashFunc(PyObject *ob)
{
	return ((PyUnicode *)ob)->hash();
}

// @pymethod |PyUnicode|__str__|Used when a (8-bit) string representation of the Unicode object is required.
 PyObject * PyUnicode::strFunc(PyObject *ob)
{
	return ((PyUnicode *)ob)->asStr();
}

// @pymethod |PyUnicode|__print__|Used when the Unicode object is printed.
int PyUnicode::printFunc(PyObject *ob, FILE *fp, int flags)
{
	return ((PyUnicode *)ob)->print(fp, flags);
}

// @pymethod |PyUnicode|__repr__|Used when repr(object) is used.
PyObject *PyUnicode::reprFunc(PyObject *ob)
{
	// @comm Note the format is L'string' and that the string portion
	// is currently not escaped, as Python does for normal strings.
	return ((PyUnicode *)ob)->repr();
}

// @pymethod |PyUnicode|__getattr__|Used to access attributes of the Unicode object.
PyObject * PyUnicode::getattrFunc(PyObject *ob, char *name)
{
	return ((PyUnicode *)ob)->getattr(name);
}

int PyUnicode::lengthFunc(PyObject *ob)
{
	return SysStringLen(((PyUnicode *)ob)->m_bstrValue);
}

PyObject * PyUnicode::concatFunc(PyObject *ob1, PyObject *ob2)
{
	return ((PyUnicode *)ob1)->concat(ob2);
}

PyObject * PyUnicode::repeatFunc(PyObject *ob, int count)
{
	return ((PyUnicode *)ob)->repeat(count);
}

PyObject * PyUnicode::itemFunc(PyObject *ob, int index)
{
	return ((PyUnicode *)ob)->item(index);
}

PyObject * PyUnicode::sliceFunc(PyObject *ob, int start, int end)
{
	return ((PyUnicode *)ob)->slice(start, end);
}

PyObject * PyUnicode::upperFunc(PyObject *ob, PyObject *args)
{
    if ( !PyArg_ParseTuple(args, ":upper"))
       return NULL;
	return ((PyUnicode *)ob)->upper();
}

PyObject * PyUnicode::lowerFunc(PyObject *ob, PyObject *args)
{
    if ( !PyArg_ParseTuple(args, ":lower"))
       return NULL;
	return ((PyUnicode *)ob)->lower();
}
