Compare commits

...

4 Commits

Author SHA1 Message Date
Roberto E. Vargas Caballero e8f1308586 Receive only a wchar_t in tchar()
It makes simpler the conversion from utf8 input string and
makes simpler the checks done in tputc, but it still requires
a lot of additional conversions that will be removed later.
2014-09-26 19:25:07 +02:00
Roberto E. Vargas Caballero d65ebe9aed First step to plain unicode representation
tprinter() is used in tputc, so a first step is to change it
to accept a wchar_t instead of a multibyte string. This patch makes
more ugly the code because it needs a lot of conversions, but
they will be removed later.
2014-09-26 19:25:07 +02:00
Roberto E. Vargas Caballero 28f56c8842 Remove last parameter of utf8encode
This parameter was always UTF_SIZ, so it is better remove it and
use directly UTF_SIZ in in.
2014-09-26 19:25:06 +02:00
Roberto E. Vargas Caballero 23af75fc75 Simplify utf8decodebyte using some locals
These local variables help to make expressions simpler and avoid
use a pointer as induction variable in a for loop.
2014-09-26 19:25:06 +02:00
1 changed files with 127 additions and 106 deletions

233
st.c
View File

@ -363,7 +363,7 @@ static void strparse(void);
static void strreset(void);
static int tattrset(int);
static void tprinter(char *, size_t);
static void tprinter(wchar_t);
static void tdumpsel(void);
static void tdumpline(int);
static void tdump(void);
@ -379,20 +379,20 @@ static void tmoveato(int, int);
static void tnew(int, int);
static void tnewline(int);
static void tputtab(int);
static void tputc(char *, int);
static void tputc(wchar_t);
static void treset(void);
static void tresize(int, int);
static void tscrollup(int, int);
static void tscrolldown(int, int);
static void tsetattr(int *, int);
static void tsetchar(char *, Glyph *, int, int);
static void tsetchar(wchar_t, Glyph *, int, int);
static void tsetscroll(int, int);
static void tswapscreen(void);
static void tsetdirt(int, int);
static void tsetdirtattr(int);
static void tsetmode(bool, bool, int *, int);
static void tfulldirt(void);
static void techo(char *, int);
static void techo(wchar_t);
static void tcontrolcode(uchar );
static void tdectest(char );
static int32_t tdefcolor(int *, int *, int);
@ -452,12 +452,12 @@ static void selsnap(int, int *, int *, int);
static void getbuttoninfo(XEvent *);
static void mousereport(XEvent *);
static size_t utf8decode(char *, long *, size_t);
static long utf8decodebyte(char, size_t *);
static size_t utf8encode(long, char *, size_t);
static char utf8encodebyte(long, size_t);
static size_t utf8decode(char *, wchar_t *, size_t);
static wchar_t utf8decodebyte(uchar, size_t *);
static size_t utf8encode(wchar_t, char *);
static char utf8encodebyte(wchar_t, size_t);
static size_t utf8len(char *);
static size_t utf8validate(long *, size_t);
static size_t utf8validate(wchar_t *, size_t);
static ssize_t xwrite(int, const char *, size_t);
static void *xmalloc(size_t);
@ -567,9 +567,9 @@ xstrdup(char *s) {
}
size_t
utf8decode(char *c, long *u, size_t clen) {
utf8decode(char *c, wchar_t *u, size_t clen) {
size_t i, j, len, type;
long udecoded;
wchar_t udecoded;
*u = UTF_INVALID;
if(!clen)
@ -589,20 +589,27 @@ utf8decode(char *c, long *u, size_t clen) {
return len;
}
long
utf8decodebyte(char c, size_t *i) {
for(*i = 0; *i < LEN(utfmask); ++(*i))
if(((uchar)c & utfmask[*i]) == utfbyte[*i])
return (uchar)c & ~utfmask[*i];
return 0;
wchar_t
utf8decodebyte(uchar c, size_t *len) {
size_t i;
long ret = 0;
for(i = 0; i < LEN(utfmask); ++i) {
if((c & utfmask[i]) == utfbyte[i]) {
ret = c & ~utfmask[i];
break;
}
}
*len = i;
return ret;
}
size_t
utf8encode(long u, char *c, size_t clen) {
utf8encode(wchar_t u, char *c) {
size_t len, i;
len = utf8validate(&u, 0);
if(clen < len)
if(len > UTF_SIZ)
return 0;
for(i = len - 1; i != 0; --i) {
c[i] = utf8encodebyte(u, 0);
@ -613,17 +620,17 @@ utf8encode(long u, char *c, size_t clen) {
}
char
utf8encodebyte(long u, size_t i) {
utf8encodebyte(wchar_t u, size_t i) {
return utfbyte[i] | (u & ~utfmask[i]);
}
size_t
utf8len(char *c) {
return utf8decode(c, &(long){0}, UTF_SIZ);
return utf8decode(c, &(wchar_t){0}, UTF_SIZ);
}
size_t
utf8validate(long *u, size_t i) {
utf8validate(wchar_t *u, size_t i) {
if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
*u = UTF_INVALID;
for(i = 1; *u > utfmax[i]; ++i)
@ -1244,9 +1251,8 @@ ttyread(void) {
static char buf[BUFSIZ];
static int buflen = 0;
char *ptr;
char s[UTF_SIZ];
int charsize; /* size of utf8 char in bytes */
long unicodep;
wchar_t unicodep;
int ret;
/* append read bytes to unprocessed bytes */
@ -1257,8 +1263,7 @@ ttyread(void) {
buflen += ret;
ptr = buf;
while((charsize = utf8decode(ptr, &unicodep, buflen))) {
utf8encode(unicodep, s, UTF_SIZ);
tputc(s, charsize);
tputc(unicodep);
ptr += charsize;
buflen -= charsize;
}
@ -1275,9 +1280,17 @@ ttywrite(const char *s, size_t n) {
void
ttysend(char *s, size_t n) {
int len;
wchar_t u;
ttywrite(s, n);
if(IS_SET(MODE_ECHO))
techo(s, n);
if(IS_SET(MODE_ECHO)) {
while ((len = utf8decode(s, &u, n)) != 0) {
techo(u);
n -= len;
s += len;
}
}
}
void
@ -1527,7 +1540,7 @@ tmoveto(int x, int y) {
}
void
tsetchar(char *c, Glyph *attr, int x, int y) {
tsetchar(wchar_t u, Glyph *attr, int x, int y) {
static char *vt100_0[62] = { /* 0x41 - 0x7e */
"", "", "", "", "", "", "", /* A - G */
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
@ -1538,16 +1551,21 @@ tsetchar(char *c, Glyph *attr, int x, int y) {
"", "", "", "", "", "", "", "", /* p - w */
"", "", "", "π", "", "£", "·", /* x - ~ */
};
char c[UTF_SIZ];
c[0] = '\0';
/*
* The table is proudly stolen from rxvt.
*/
if(term.trantbl[term.charset] == CS_GRAPHIC0) {
if(BETWEEN(c[0], 0x41, 0x7e) && vt100_0[c[0] - 0x41]) {
c = vt100_0[c[0] - 0x41];
if(BETWEEN(u, 0x41, 0x7e) && vt100_0[u - 0x41]) {
strcpy(c, vt100_0[u - 0x41]);
}
}
if (c[0] == '\0')
utf8encode(u, c);
if(term.line[y][x].mode & ATTR_WIDE) {
if(x+1 < term.col) {
term.line[y][x+1].c[0] = ' ';
@ -2248,7 +2266,13 @@ strreset(void) {
}
void
tprinter(char *s, size_t len) {
tprinter(wchar_t u) {
size_t len;
char s[UTF_SIZ];
if((len = utf8encode(u, s)) == 0)
return;
if(iofd != -1 && xwrite(iofd, s, len) < 0) {
fprintf(stderr, "Error writing in %s:%s\n",
opt_io, strerror(errno));
@ -2275,24 +2299,34 @@ printsel(const Arg *arg) {
void
tdumpsel(void) {
char *ptr;
size_t len;
wchar_t u;
if((ptr = getsel())) {
tprinter(ptr, strlen(ptr));
free(ptr);
if((ptr = getsel()) == NULL)
return;
while((len = utf8decode(ptr, &u, UTF_SIZ))) {
tprinter(u);
ptr += len;
}
free(ptr);
}
void
tdumpline(int n) {
Glyph *bp, *end;
wchar_t u;
bp = &term.line[n][0];
end = &bp[MIN(tlinelen(n), term.col) - 1];
if(bp != end || bp->c[0] != ' ') {
for( ;bp <= end; ++bp)
tprinter(bp->c, utf8len(bp->c));
for( ;bp <= end; ++bp) {
if(!utf8decode(bp->c, &u, UTF_SIZ))
break;
tprinter(u);
}
}
tprinter("\n", 1);
tprinter('\n');
}
void
@ -2320,26 +2354,18 @@ tputtab(int n) {
}
void
techo(char *buf, int len) {
for(; len > 0; buf++, len--) {
char c = *buf;
if(ISCONTROL((uchar) c)) { /* control code */
if(c & 0x80) {
c &= 0x7f;
tputc("^", 1);
tputc("[", 1);
} else if(c != '\n' && c != '\r' && c != '\t') {
c ^= 0x40;
tputc("^", 1);
}
tputc(&c, 1);
} else {
break;
techo(wchar_t u) {
if(ISCONTROL(u)) { /* control code */
if(u & 0x80) {
u &= 0x7f;
tputc('^');
tputc('[');
} else if(u != '\n' && u != '\r' && u != '\t') {
u ^= 0x40;
tputc('^');
}
}
if(len)
tputc(buf, len);
tputc(u);
}
void
@ -2357,13 +2383,12 @@ tdeftran(char ascii) {
void
tdectest(char c) {
static char E[UTF_SIZ] = "E";
int x, y;
if(c == '8') { /* DEC screen alignment test. */
for(x = 0; x < term.col; ++x) {
for(y = 0; y < term.row; ++y)
tsetchar(E, &term.c.attr, x, y);
tsetchar('E', &term.c.attr, x, y);
}
}
}
@ -2394,7 +2419,6 @@ tstrsequence(uchar c) {
void
tcontrolcode(uchar ascii) {
static char question[UTF_SIZ] = "?";
switch(ascii) {
case '\t': /* HT */
@ -2435,7 +2459,7 @@ tcontrolcode(uchar ascii) {
term.charset = 1;
return;
case '\032': /* SUB */
tsetchar(question, &term.c.attr, term.c.x, term.c.y);
tsetchar('?', &term.c.attr, term.c.x, term.c.y);
case '\030': /* CAN */
csireset();
break;
@ -2556,26 +2580,20 @@ eschandle(uchar ascii) {
}
void
tputc(char *c, int len) {
uchar ascii;
tputc(wchar_t u) {
char s[UTF_SIZ];
int len;
bool control;
long unicodep;
int width;
Glyph *gp;
if(len == 1) {
width = 1;
unicodep = ascii = *c;
} else {
utf8decode(c, &unicodep, UTF_SIZ);
width = wcwidth(unicodep);
control = ISCONTROLC1(unicodep);
ascii = unicodep;
}
width = wcwidth(u);
control = ISCONTROLC1(u);
if(IS_SET(MODE_PRINT))
tprinter(c, len);
control = ISCONTROL(unicodep);
tprinter(u);
control = ISCONTROL(u);
/*
* STR sequence must be checked before anything else
@ -2584,31 +2602,33 @@ tputc(char *c, int len) {
* character.
*/
if(term.esc & ESC_STR) {
if(width == 1 &&
(ascii == '\a' || ascii == 030 ||
ascii == 032 || ascii == 033 ||
ISCONTROLC1(unicodep))) {
if(u == '\a' || u == 030 || u == 032 || u == 033 ||
ISCONTROLC1(u)) {
term.esc &= ~(ESC_START|ESC_STR);
term.esc |= ESC_STR_END;
} else if(strescseq.len + len < sizeof(strescseq.buf) - 1) {
memmove(&strescseq.buf[strescseq.len], c, len);
strescseq.len += len;
return;
} else {
/*
* Here is a bug in terminals. If the user never sends
* some code to stop the str or esc command, then st
* will stop responding. But this is better than
* silently failing with unknown characters. At least
* then users will report back.
*
* In the case users ever get fixed, here is the code:
*/
/*
* term.esc = 0;
* strhandle();
*/
return;
/* TODO: make csiescseq.buf buffer of wchar_t */
len = utf8encode(u, s);
if(strescseq.len + len < sizeof(strescseq.buf) - 1) {
memmove(&strescseq.buf[strescseq.len], s, len);
strescseq.len += len;
return;
} else {
/*
* Here is a bug in terminals. If the user never sends
* some code to stop the str or esc command, then st
* will stop responding. But this is better than
* silently failing with unknown characters. At least
* then users will report back.
*
* In the case users ever get fixed, here is the code:
*/
/*
* term.esc = 0;
* strhandle();
*/
return;
}
}
}
@ -2618,15 +2638,16 @@ tputc(char *c, int len) {
* they must not cause conflicts with sequences.
*/
if(control) {
tcontrolcode(ascii);
tcontrolcode(u);
/*
* control codes are not shown ever
*/
return;
} else if(term.esc & ESC_START) {
if(term.esc & ESC_CSI) {
csiescseq.buf[csiescseq.len++] = ascii;
if(BETWEEN(ascii, 0x40, 0x7E)
/* TODO: make csiescseq.buf buffer of wchar_t */
csiescseq.buf[csiescseq.len++] = u;
if(BETWEEN(u, 0x40, 0x7E)
|| csiescseq.len >= \
sizeof(csiescseq.buf)-1) {
term.esc = 0;
@ -2635,11 +2656,11 @@ tputc(char *c, int len) {
}
return;
} else if(term.esc & ESC_ALTCHARSET) {
tdeftran(ascii);
tdeftran(u);
} else if(term.esc & ESC_TEST) {
tdectest(ascii);
tdectest(u);
} else {
if (!eschandle(ascii))
if (!eschandle(u))
return;
/* sequence already finished */
}
@ -2665,7 +2686,7 @@ tputc(char *c, int len) {
if(term.c.x+width > term.col)
tnewline(1);
tsetchar(c, &term.c.attr, term.c.x, term.c.y);
tsetchar(u, &term.c.attr, term.c.x, term.c.y);
if(width == 2) {
gp->mode |= ATTR_WIDE;
@ -3173,7 +3194,7 @@ xdraws(char *s, Glyph base, int x, int y, int charlen, int bytelen) {
int frcflags;
int u8fl, u8fblen, u8cblen, doesexist;
char *u8c, *u8fs;
long unicodep;
wchar_t unicodep;
Font *font = &dc.font;
FcResult fcres;
FcPattern *fcpattern, *fontpattern;
@ -3546,7 +3567,7 @@ drawregion(int x1, int y1, int x2, int y2) {
Glyph base, new;
char buf[DRAW_BUF_SIZ];
bool ena_sel = sel.ob.x != -1 && sel.alt == IS_SET(MODE_ALTSCREEN);
long unicodep;
wchar_t unicodep;
if(!(xw.state & WIN_VISIBLE))
return;
@ -3734,7 +3755,7 @@ kpress(XEvent *ev) {
if(IS_SET(MODE_8BIT)) {
if(*buf < 0177) {
c = *buf | 0x80;
len = utf8encode(c, buf, UTF_SIZ);
len = utf8encode(c, buf);
}
} else {
buf[1] = buf[0];