summaryrefslogtreecommitdiff
path: root/source/UnicodeInlines.incl_cpp
blob: 829132a5a18faacf6d16182c1741ecde0a7f3c5c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#ifndef __UnicodeInlines_incl_cpp__
#define __UnicodeInlines_incl_cpp__

// =================================================================================================
// Copyright 2004 Adobe Systems Incorporated
// All Rights Reserved.
//
// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
// of the Adobe license agreement accompanying it.
// =================================================================================================

#include "source/UnicodeConversions.hpp"

// =================================================================================================
// Inner loop utilities that need to be inlined.
// =================================================================================================

static inline XMP_Uns32 GetCodePoint ( const XMP_Uns8 ** utf8Str_io )
{
	const XMP_Uns8 * u8Ptr = *utf8Str_io;
	XMP_Uns32 cp;
	size_t u8Len;
	CodePoint_from_UTF8 ( u8Ptr, 4, &cp, &u8Len );	// Throws an exception for errors.
	*utf8Str_io = u8Ptr + u8Len;
	return cp;
}

// =================================================================================================

static inline bool IsStartChar_ASCII ( XMP_Uns32 cp )
{
	// ASCII starting characters for an XML name.
	if ( (('a' <= cp) && (cp <= 'z')) || (('A' <= cp) && (cp <= 'Z')) || (cp == '_') ) return true;
	return false;
}

// -------------------------------------------------------------------------------------------------

static inline bool IsStartChar_NonASCII ( XMP_Uns32 cp )
{
	// Non-ASCII starting characters for an XML name.
	
	if ( ((0xC0 <= cp) && (cp <= 0xD6))  || ((0xD8 <= cp) && (cp <= 0xF6)) ) return true;
	if ( ((0xF8 <= cp) && (cp <= 0x2FF)) || ((0x370 <= cp) && (cp <= 0x37D)) ) return true;

	if ( ((0x37F <= cp) && (cp <= 0x1FFF))  || ((0x200C <= cp) && (cp <= 0x200D)) ) return true;
	if ( ((0x2070 <= cp) && (cp <= 0x218F)) || ((0x2C00 <= cp) && (cp <= 0x2FEF)) ) return true;
	if ( ((0x3001 <= cp) && (cp <= 0xD7FF)) || ((0xF900 <= cp) && (cp <= 0xFDCF)) ) return true;
	if ( ((0xFDF0 <= cp) && (cp <= 0xFFFD)) || ((0x10000 <= cp) && (cp <= 0xEFFFF)) ) return true;

	return false;

}

// -------------------------------------------------------------------------------------------------

static inline bool IsOtherChar_ASCII ( XMP_Uns32 cp )
{
	// ASCII following characters for an XML name.
	if ( (('0' <= cp) && (cp <= '9')) || (cp == '-') || (cp == '.') ) return true;
	return false;
}

// -------------------------------------------------------------------------------------------------

static inline bool IsOtherChar_NonASCII ( XMP_Uns32 cp )
{
	// Non-ASCII following characters for an XML name.
	if ( (cp == 0xB7) || ((0x300 <= cp) && (cp <= 0x36F))  || ((0x203F <= cp) && (cp <= 0x2040)) ) return true;
	return false;
}

// -------------------------------------------------------------------------------------------------

static inline void VerifyUTF8 ( XMP_StringPtr str )
{
	const XMP_Uns8 * utf8Str = (XMP_Uns8*)str;
	while ( *utf8Str != 0 ) {
		while ( (*utf8Str != 0) && (*utf8Str < 0x80) ) ++utf8Str;
		if ( *utf8Str >= 0x80 ) (void) GetCodePoint ( &utf8Str );	// Throws for bad UTF-8.
	}
}

// -------------------------------------------------------------------------------------------------

static inline void VerifySimpleXMLName ( XMP_StringPtr _nameStart, XMP_StringPtr _nameEnd )
{

	const XMP_Uns8 * nameStart = (const XMP_Uns8 *) _nameStart;
	const XMP_Uns8 * nameEnd   = (const XMP_Uns8 *) _nameEnd;
	const XMP_Uns8 * namePos   = nameStart;
	XMP_Uns32 cp;
	
	// The first character is more restricted.
	
	if ( nameStart >= nameEnd ) XMP_Throw ( "Empty XML name", kXMPErr_BadXPath );

	cp = *namePos;
	if ( cp < 0x80 ) {
		++namePos;
		if ( ! IsStartChar_ASCII(cp) ) goto NameError;
	} else {
		cp = GetCodePoint ( &namePos );
		if ( ! IsStartChar_NonASCII(cp) ) goto NameError;
	}

	// Check the rest of the name.
	
	while ( namePos < nameEnd ) {
		cp = *namePos;
		if ( cp < 0x80 ) {
			++namePos;
			if ( (! IsStartChar_ASCII(cp)) && (! IsOtherChar_ASCII(cp)) ) goto NameError;
		} else {
			cp = GetCodePoint ( &namePos );
			if ( (! IsStartChar_NonASCII(cp)) && (! IsOtherChar_NonASCII(cp)) ) goto NameError;
		}
	}

	return;

NameError:
	XMP_Throw ( "Bad XML name", kXMPErr_BadXPath );
	
}	// VerifySimpleXMLName

// =================================================================================================

#endif	// __UnicodeInlines_incl_cpp__