/************************************************************************* ** UnicodeTest.cpp ** ** ** ** This file is part of dvisvgm -- a fast DVI to SVG converter ** ** Copyright (C) 2005-2024 Martin Gieseking ** ** ** ** This program is free software; you can redistribute it and/or ** ** modify it under the terms of the GNU General Public License as ** ** published by the Free Software Foundation; either version 3 of ** ** the License, or (at your option) any later version. ** ** ** ** This program is distributed in the hope that it will be useful, but ** ** WITHOUT ANY WARRANTY; without even the implied warranty of ** ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** ** GNU General Public License for more details. ** ** ** ** You should have received a copy of the GNU General Public License ** ** along with this program; if not, see . ** *************************************************************************/ #include #include "Unicode.hpp" using namespace std; TEST(UnicodeTest, isValidCodepoint) { for (uint32_t i=0; i <= 0x20; i++) EXPECT_FALSE(Unicode::isValidCodepoint(i)) << "i=" << i; for (uint32_t i=0x21; i <= 0x7e; i++) EXPECT_TRUE(Unicode::isValidCodepoint(i)) << "i=" << i; EXPECT_FALSE(Unicode::isValidCodepoint(0xfffe)); EXPECT_FALSE(Unicode::isValidCodepoint(0xffff)); EXPECT_TRUE(Unicode::isValidCodepoint(0x10000)); } TEST(UnicodeTest, charToCodepoint) { for (uint32_t i=0; i <= 0x20; i++) EXPECT_EQ(Unicode::charToCodepoint(i), 0xe000+i) << "i=" << i; for (uint32_t i=0x21; i <= 0x7e; i++) EXPECT_EQ(Unicode::charToCodepoint(i), i) << "i=" << i; for (uint32_t i=0x7f; i <= 0x9f; i++) EXPECT_EQ(Unicode::charToCodepoint(i), 0xe021+i-0x7f) << "i=" << i; EXPECT_EQ(Unicode::charToCodepoint(0x10fffd), 0x10fffdu); EXPECT_EQ(Unicode::charToCodepoint(0x10fffe), 0xe887u); EXPECT_EQ(Unicode::charToCodepoint(0x10ffff), 0xe888u); } TEST(UnicodeTest, utf8) { EXPECT_EQ(Unicode::utf8(0x1), string("\x01")); EXPECT_EQ(Unicode::utf8(0x47), string("\x47")); EXPECT_EQ(Unicode::utf8(0x7f), string("\x7f")); EXPECT_EQ(Unicode::utf8(0x80), string("\xc2\x80")); EXPECT_EQ(Unicode::utf8(0x07ff), string("\xdf\xbf")); EXPECT_EQ(Unicode::utf8(0x0800), string("\xe0\xa0\x80")); EXPECT_EQ(Unicode::utf8(0x7fff), string("\xe7\xbf\xbf")); EXPECT_EQ(Unicode::utf8(0xfffe), string("\xef\xbf\xbe")); EXPECT_EQ(Unicode::utf8(0xffff), string("\xef\xbf\xbf")); EXPECT_EQ(Unicode::utf8(0x10000), string("\xf0\x90\x80\x80")); EXPECT_EQ(Unicode::utf8(0x10ffff), string("\xf4\x8f\xbf\xbf")); EXPECT_TRUE(Unicode::utf8(0x110000).empty()); } TEST(UnicodeTest, fromSurrogate1) { EXPECT_EQ(Unicode::fromSurrogate(0xd800dc00), 0x10000u); EXPECT_EQ(Unicode::fromSurrogate(0xd83cdd10), 0x1f110u); // invalid surrogates EXPECT_EQ(Unicode::fromSurrogate(0xd7ffdc00), 0u); EXPECT_EQ(Unicode::fromSurrogate(0xdc00dc00), 0u); EXPECT_EQ(Unicode::fromSurrogate(0xd800dbff), 0u); EXPECT_EQ(Unicode::fromSurrogate(0xd800e000), 0u); } TEST(UnicodeTest, fromSurrogate2) { EXPECT_EQ(Unicode::fromSurrogate(0xd800, 0xdc00), 0x10000u); EXPECT_EQ(Unicode::fromSurrogate(0xd83c, 0xdd10), 0x1f110u); // invalid surrogates EXPECT_EQ(Unicode::fromSurrogate(0xd7ff, 0xdc00), 0u); EXPECT_EQ(Unicode::fromSurrogate(0xdc00, 0xdc00), 0u); EXPECT_EQ(Unicode::fromSurrogate(0xd800, 0xdbff), 0u); EXPECT_EQ(Unicode::fromSurrogate(0xd800, 0xe000), 0u); } TEST(UnicodeTest, toSurrogate) { EXPECT_EQ(Unicode::toSurrogate(0x10000), 0xd800dc00u); EXPECT_EQ(Unicode::toSurrogate(0x1f110), 0xd83cdd10u); // invalid code points EXPECT_EQ(Unicode::toSurrogate(0xffff), 0u); EXPECT_EQ(Unicode::toSurrogate(0x110000), 0u); } TEST(UnicodeTest, aglNameToCodepoint1) { EXPECT_EQ(Unicode::aglNameToCodepoint("does not exist"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("Eogonek"), 0x118); EXPECT_EQ(Unicode::aglNameToCodepoint("alpha"), 0x03b1); EXPECT_EQ(Unicode::aglNameToCodepoint("Alpha"), 0x0391); EXPECT_EQ(Unicode::aglNameToCodepoint("alphatonos"), 0x03ac); EXPECT_EQ(Unicode::aglNameToCodepoint("SF460000"), 0x2568); } TEST(UnicodeTest, aglNameToCodepoint2) { EXPECT_EQ(Unicode::aglNameToCodepoint("uni1234"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("uni1234.suffix"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("uni1234_part2"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("uni12345678"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("uni123"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("uni12345"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("uni1234567"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("uni100000"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("uni123E"), 0x123e); EXPECT_EQ(Unicode::aglNameToCodepoint("uni123e"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("uniD7FF"), 0xd7ff); EXPECT_EQ(Unicode::aglNameToCodepoint("uniD800"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("u1234"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("u1234.suffix"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("u1234_part2"), 0x1234); EXPECT_EQ(Unicode::aglNameToCodepoint("u12345678"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("u123.suffix"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("u123"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("u12345"), 0x12345); EXPECT_EQ(Unicode::aglNameToCodepoint("u1234567"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("u100000"), 0x100000); EXPECT_EQ(Unicode::aglNameToCodepoint("u123E"), 0x123e); EXPECT_EQ(Unicode::aglNameToCodepoint("u123e"), 0); EXPECT_EQ(Unicode::aglNameToCodepoint("uD7FF"), 0xd7ff); EXPECT_EQ(Unicode::aglNameToCodepoint("uD800"), 0); }