/* smiley.c Jeff Ondich, 24 Jan 2022 This program demonstrates how to think about the storage of a codepoint in UTF-8 as a sequence of bytes. */ #include #include "bits.h" #define UTF8_BUFFER_SIZE 5 int main() { char buffer[UTF8_BUFFER_SIZE]; // Put U+1F600 GRINNING FACE, encoded into UTF-8, into the buffer. buffer[0] = 0xF0; buffer[1] = 0x9F; buffer[2] = 0x98; buffer[3] = 0x80; // This null character isn't part of the UTF-8 encoding. // But we put it here (see the "bits and characters" programming // assignment) so we can use printf and %s to print the resulting // character to the terminal. buffer[4] = 0x00; // Print the hex values of the individual bytes in the encoding. printf("The bytes in the UTF-8 encoding:\n"); for (int k = 0; k < UTF8_BUFFER_SIZE - 1; k++) { printf("0x%x\n", (unsigned char)buffer[k]); } printf("\n"); // Print the character itself to output by treating the encoded // character as a null-terminated char array. Note that if your // terminal program doesn't default to UTF-8, you might have to change // its settings to see the right output. printf("Character: %s\n", buffer); // Uncomment if you have from_utf8 available. // Print the codepoint, as computed from your from_utf8 function. //int codepoint = from_utf8(buffer); //printf("Codepoint: 0x%x\n", codepoint); return 0; }