208 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			208 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /**
 | |
|  * @file    genpages.c
 | |
|  * @brief   generate required font page files
 | |
|  * @author  Yunhui Fu (yhfudev@gmail.com)
 | |
|  * @version 1.0
 | |
|  * @date    2015-02-19
 | |
|  * @copyright Yunhui Fu (2015)
 | |
|  */
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <stdint.h>    /* uint8_t */
 | |
| #include <stdlib.h>    /* size_t */
 | |
| #include <string.h>
 | |
| #include <assert.h>
 | |
| #include "getline.h"
 | |
| 
 | |
| wchar_t get_val_utf82uni(uint8_t *pstart) {
 | |
|   size_t cntleft;
 | |
|   wchar_t retval = 0;
 | |
| 
 | |
|   if (0 == (0x80 & *pstart)) return *pstart;
 | |
| 
 | |
|   if (((*pstart & 0xE0) ^ 0xC0) == 0) {
 | |
|     cntleft = 1;
 | |
|     retval = *pstart & ~0xE0;
 | |
|   }
 | |
|   else if (((*pstart & 0xF0) ^ 0xE0) == 0) {
 | |
|     cntleft = 2;
 | |
|     retval = *pstart & ~0xF0;
 | |
|   }
 | |
|   else if (((*pstart & 0xF8) ^ 0xF0) == 0) {
 | |
|     cntleft = 3;
 | |
|     retval = *pstart & ~0xF8;
 | |
|   }
 | |
|   else if (((*pstart & 0xFC) ^ 0xF8) == 0) {
 | |
|     cntleft = 4;
 | |
|     retval = *pstart & ~0xFC;
 | |
|   }
 | |
|   else if (((*pstart & 0xFE) ^ 0xFC) == 0) {
 | |
|     cntleft = 5;
 | |
|     retval = *pstart & ~0xFE;
 | |
|   }
 | |
|   else {
 | |
|     /* encoding error */
 | |
|     cntleft = 0;
 | |
|     retval = 0;
 | |
|   }
 | |
|   pstart++;
 | |
|   for (; cntleft > 0; cntleft --) {
 | |
|     retval <<= 6;
 | |
|     retval |= *pstart & 0x3F;
 | |
|     pstart++;
 | |
|   }
 | |
|   return retval;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @brief 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
 | |
|  *
 | |
|  * @param pstart : 存储 UTF-8 字符的指针
 | |
|  * @param pval : 需要返回的 Unicode 字符存放地址指针
 | |
|  *
 | |
|  * @return 成功返回下个 UTF-8 字符的位置
 | |
|  *
 | |
|  * 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
 | |
|  */
 | |
| uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) {
 | |
|   uint32_t val = 0;
 | |
|   uint8_t *p = pstart;
 | |
|   /*size_t maxlen = strlen(pstart);*/
 | |
| 
 | |
|   assert(NULL != pstart);
 | |
| 
 | |
|   if (0 == (0x80 & *p)) {
 | |
|     val = (size_t)*p;
 | |
|     p++;
 | |
|   }
 | |
|   else if (0xC0 == (0xE0 & *p)) {
 | |
|     val = *p & 0x1F;
 | |
|     val <<= 6;
 | |
|     p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     p++;
 | |
|     assert((wchar_t)val == get_val_utf82uni(pstart));
 | |
|   }
 | |
|   else if (0xE0 == (0xF0 & *p)) {
 | |
|     val = *p & 0x0F;
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     p++;
 | |
|     assert((wchar_t)val == get_val_utf82uni(pstart));
 | |
|   }
 | |
|   else if (0xF0 == (0xF8 & *p)) {
 | |
|     val = *p & 0x07;
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     p++;
 | |
|     assert((wchar_t)val == get_val_utf82uni(pstart));
 | |
|   }
 | |
|   else if (0xF8 == (0xFC & *p)) {
 | |
|     val = *p & 0x03;
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     p++;
 | |
|     assert((wchar_t)val == get_val_utf82uni(pstart));
 | |
|   }
 | |
|   else if (0xFC == (0xFE & *p)) {
 | |
|     val = *p & 0x01;
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     val <<= 6; p++;
 | |
|     val |= (*p & 0x3F);
 | |
|     p++;
 | |
|     assert((wchar_t)val == get_val_utf82uni(pstart));
 | |
|   }
 | |
|   else if (0x80 == (0xC0 & *p)) {
 | |
|     /* error? */
 | |
|     for (; 0x80 == (0xC0 & *p); p++);
 | |
|   }
 | |
|   else {
 | |
|     /* error */
 | |
|     for (; ((0xFE & *p) > 0xFC); p++);
 | |
|   }
 | |
|   /*
 | |
|   if (val == 0) {
 | |
|     p = NULL;
 | |
|   */
 | |
|   /*
 | |
|   }
 | |
|   else if (pstart + maxlen < p) {
 | |
|     p = pstart;
 | |
|     if (pval) *pval = 0;
 | |
|   }
 | |
|   */
 | |
| 
 | |
|   if (pval) *pval = val;
 | |
| 
 | |
|   return p;
 | |
| }
 | |
| 
 | |
| void usage(char* progname) {
 | |
|   fprintf(stderr, "Usage: %s\n", progname);
 | |
|   fprintf(stderr, "   read data from stdin\n");
 | |
| }
 | |
| 
 | |
| void utf8_parse(const char* msg, unsigned int len) {
 | |
|   uint8_t *pend = NULL;
 | |
|   uint8_t *p;
 | |
|   uint8_t *pre;
 | |
|   wchar_t val;
 | |
|   int page;
 | |
| 
 | |
|   pend = (uint8_t *)msg + len;
 | |
|   for (pre = (uint8_t *)msg; pre < pend;) {
 | |
|     val = 0;
 | |
|     p = get_utf8_value(pre, &val);
 | |
|     if (NULL == p) break;
 | |
|     page = val / 128;
 | |
|     if (val >= 256) {
 | |
|       fprintf(stdout, "%d %d ", page, (val % 128));
 | |
|       for (; pre < p; pre++) fprintf(stdout, "%c", *pre);
 | |
|       fprintf(stdout, "\n");
 | |
|     }
 | |
|     pre = p;
 | |
|   }
 | |
| }
 | |
| 
 | |
| int load_file(FILE *fp) {
 | |
|   char * buffer = NULL;
 | |
|   size_t szbuf = 0;
 | |
| 
 | |
|   szbuf = 10000;
 | |
|   buffer = (char*)malloc(szbuf);
 | |
|   if (NULL == buffer) return -1;
 | |
|   //pos = ftell (fp);
 | |
|   while (getline( &buffer, &szbuf, fp ) > 0)
 | |
|     utf8_parse((const char*)buffer, (unsigned int)strlen ((char *)buffer));
 | |
| 
 | |
|   free(buffer);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| int main(int argc, char * argv[]) {
 | |
|   if (argc > 1) {
 | |
|     usage(argv[0]);
 | |
|     exit(1);
 | |
|   }
 | |
|   load_file(stdin);
 | |
| }
 |