-
Notifications
You must be signed in to change notification settings - Fork 17
/
chapter03.tex
1155 lines (1072 loc) · 43.8 KB
/
chapter03.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% -*- coding: utf-8 -*-
\documentclass[letterpaper]{book}
\input{preamble}
\setcounter{chapter}{2}
\begin{document}
%\chapter{Characters}\label{char}
\chapter{Characters}\label{char}
%Internally, \TeX\ represents characters by their (integer)
%character code. This chapter treats those codes, and the
%commands that have access to them.
Internally, \TeX\ represents characters by their (integer)
character code. This chapter treats those codes, and the
commands that have access to them.
%\label{cschap:char}\label{cschap:chardef}\label{cschap:accent}\label{cschap:uccode}\label{cschap:lccode}\label{cschap:uppercase}\label{cschap:lowercase}\label{cschap:string}\label{cschap:escapechar}
%\begin{inventory}
%\item [\cs{char}]
% Explicit denotation of a character to be typeset.
\label{cschap:char}\label{cschap:chardef}\label{cschap:accent}\label{cschap:uccode}\label{cschap:lccode}\label{cschap:uppercase}\label{cschap:lowercase}\label{cschap:string}\label{cschap:escapechar}
\begin{inventory}
\item [\cs{char}]
Explicit denotation of a character to be typeset.
%\item [\cs{chardef}]
% Define a control sequence to be a synonym for
% a~character code.
\item [\cs{chardef}]
Define a control sequence to be a synonym for
a~character code.
%\item [\cs{accent}]
% Command to place accent characters.
\item [\cs{accent}]
Command to place accent characters.
%\item [\cs{if}]
% Test equality of character codes.
\item [\cs{if}]
Test equality of character codes.
%\item [\cs{ifx}]
% Test equality of both character and category codes.
\item [\cs{ifx}]
Test equality of both character and category codes.
%\item [\cs{let}]
% Define a control sequence to be a synonym of a token.
\item [\cs{let}]
Define a control sequence to be a synonym of a token.
%\item [\cs{uccode}]
% Query or set
% the character code that is the uppercase variant of a given code.
\item [\cs{uccode}]
Query or set
the character code that is the uppercase variant of a given code.
%\item [\cs{lccode}]
% Query or set
% the character code that is the lowercase variant of a given code.
\item [\cs{lccode}]
Query or set
the character code that is the lowercase variant of a given code.
%\item [\cs{uppercase}]
% Convert the \gr{general text} argument to its uppercase form.
\item [\cs{uppercase}]
Convert the \gr{general text} argument to its uppercase form.
%\item [\cs{lowercase}]
% Convert the \gr{general text} argument to its lowercase form.
\item [\cs{lowercase}]
Convert the \gr{general text} argument to its lowercase form.
%\item [\cs{string}]
% Convert a token to a string of one or more characters.
%\item [\cs{escapechar}]
% Number of the character that is to be used
% for the escape character
% when control sequences are being converted
% into character tokens. \IniTeX\ default:~92~(\cs{}).
%\end{inventory}
\item [\cs{string}]
Convert a token to a string of one or more characters.
\item [\cs{escapechar}]
Number of the character that is to be used
for the escape character
when control sequences are being converted
into character tokens. \IniTeX\ default:~92~(\cs{}).
\end{inventory}
%%\point[char:code] Character codes
%\section{Character codes}
%\label{char:code}
%\point[char:code] Character codes
\section{Character codes}
\label{char:code}
%Conceptually it is easiest to think that \TeX\ works with
%characters internally, but in fact
%\TeX\ works with integers: the \indextermsub{character}{codes}.
Conceptually it is easiest to think that \TeX\ works with
characters internally, but in fact
\TeX\ works with integers: the \indextermsub{character}{codes}.
%The way characters are encoded in a computer may differ
%from system to system.
%Therefore \TeX\ uses its own scheme of character codes.
%Any character that is read from a file (or from the user terminal)
%is converted to a character code according to the
%character code table.
%A~category code is then assigned based on this (see Chapter~\ref{mouth}).
%The character code table is based on the 7-bit \ascii{} table
%for numbers under~128 (see Section~\ref{sec:asciitable}).
The way characters are encoded in a computer may differ
from system to system.
Therefore \TeX\ uses its own scheme of character codes.
Any character that is read from a file (or from the user terminal)
is converted to a character code according to the
character code table.
A~category code is then assigned based on this (see Chapter~\ref{mouth}).
The character code table is based on the 7-bit \ascii{} table
for numbers under~128 (see Section~\ref{sec:asciitable}).
%There is an explicit conversion between characters
%(better: character tokens)
%and character codes using the left quote (grave, back quote)
%character~\n{`{}}:
%at all places where \TeX\ expects a \gram{number} you
%can use the left quote followed by a character
%token or
%a single-character control sequence.
%Thus both \verb.\count`a. and \verb.\count`\a. are synonyms
%for \verb.\count97.. See also Chapter~\ref{number}.
There is an explicit conversion between characters
(better: character tokens)
and character codes using the left quote (grave, back quote)
character~\n{`{}}:
at all places where \TeX\ expects a \gram{number} you
can use the left quote followed by a character
token or
a single-character control sequence.
Thus both \verb.\count`a. and \verb.\count`\a. are synonyms
for \verb.\count97.. See also Chapter~\ref{number}.
%The possibility of a single-character control
%sequence is necessary in certain cases such as
%\begin{disp}\verb>\catcode`\%=11>\quad or\quad \verb>\def\CommentSign{\char`\%}>\end{disp}
%which would be misunderstood if the backslash were left out.
%For instance
%\begin{verbatim}
%\catcode`%=11
%\end{verbatim}
%would consider
%the \n{=11} to be a comment.
%Single-character
%control sequences can be formed from characters with any
%category code.
The possibility of a single-character control
sequence is necessary in certain cases such as
\begin{disp}\verb>\catcode`\%=11>\quad or\quad \verb>\def\CommentSign{\char`\%}>\end{disp}
which would be misunderstood if the backslash were left out.
For instance
\begin{verbatim}
\catcode`%=11
\end{verbatim}
would consider
the \n{=11} to be a comment.
Single-character
control sequences can be formed from characters with any
category code.
%After the conversion to character codes any connection
%with external representations has disappeared. Of course,
%for most characters the visible output will `equal' the input
%(that is, an `\n{a}' causes an~`a').
%There are exceptions, however, even among the common symbols.
%In the Computer Modern
%roman fonts there are no `less than' and `greater than'
%\message{Check <>! Dammit!}%
%signs, so the input `\verb.<>.' will give `<>' in the output.
%%{\MathRMx<>}
After the conversion to character codes any connection
with external representations has disappeared. Of course,
for most characters the visible output will `equal' the input
(that is, an `\n{a}' causes an~`a').
There are exceptions, however, even among the common symbols.
In the Computer Modern
roman fonts there are no `less than' and `greater than'
\message{Check <>! Dammit!}%
signs, so the input `\verb.<>.' will give `<>' in the output.
%{\MathRMx<>}
%In order to make \TeX\ machine independent at the output
%side, the character codes are also used in the \n{dvi} file:
%opcodes $n=0\ldots127$ denote simply the instruction `take
%character $n$ from the current font'. The complete definition
%of the opcodes in a \n{dvi} file can be found in~\cite{Knuth:TeXprogram}.
In order to make \TeX\ machine independent at the output
side, the character codes are also used in the \n{dvi} file:
opcodes $n=0\ldots127$ denote simply the instruction `take
character $n$ from the current font'. The complete definition
of the opcodes in a \n{dvi} file can be found in~\cite{Knuth:TeXprogram}.
%%\point Control sequences for characters
%\section{Control sequences for characters}
%\point Control sequences for characters
\section{Control sequences for characters}
%There are a number of ways in which a control sequence can denote
%a character. The \cs{char} command specifies a character to be
%typeset; the \cs{let} command introduces
%a synonym for a character token, that is,
%the combination of character code and category code.
There are a number of ways in which a control sequence can denote
a character. The \cs{char} command specifies a character to be
typeset; the \cs{let} command introduces
a synonym for a character token, that is,
the combination of character code and category code.
%%\point Denoting characters to be typeset: \cs\char
%\section{Denoting characters to be typeset: \protect\cs{char}}
%\point Denoting characters to be typeset: \cs\char
\section{Denoting characters to be typeset: \protect\cs{char}}
%Characters can be denoted numerically by, for example,
%\verb.\char98.\cstoidx char\par.
%This command tells \TeX\ to add character number~98 of the
%current font to the horizontal list currently under construction.
Characters can be denoted numerically by, for example,
\verb.\char98.\cstoidx char\par.
This command tells \TeX\ to add character number~98 of the
current font to the horizontal list currently under construction.
%Instead of decimal notation, it is often more convenient to
%use octal or hexadecimal notation. For octal the single quote is used:
%\verb.\char'142.; hexadecimal uses the double quote: \verb.\char"62..
%Note that \verb.\char''62. is incorrect; the process that replaces
%two quotes by a double quote works at a later stage of processing
%(the visual processor) than number scanning (the execution processor).
Instead of decimal notation, it is often more convenient to
use octal or hexadecimal notation. For octal the single quote is used:
\verb.\char'142.; hexadecimal uses the double quote: \verb.\char"62..
Note that \verb.\char''62. is incorrect; the process that replaces
two quotes by a double quote works at a later stage of processing
(the visual processor) than number scanning (the execution processor).
%Because of the explicit conversion to character codes by the
%back quote character it is also possible to get a `b' \ldash provided
%that you are using a font organized a bit like the \ascii{} table \rdash
%with \verb.\char`b. or \verb.\char`\b..
Because of the explicit conversion to character codes by the
back quote character it is also possible to get a `b' \ldash provided
that you are using a font organized a bit like the \ascii{} table \rdash
with \verb.\char`b. or \verb.\char`\b..
%The \cs{char} command looks superficially a bit like
%the \verb-^^- substitution mechanism (Chapter~\ref{mouth}).
%Both mechanisms access characters without directly denoting them.
%However, the \verb-^^- mechanism operates in a very early stage of
%processing (in the input processor of \TeX,
%but before category code
%assignment); the \cs{char} command, on the other hand,
%comes in the final stages of processing.
%In effect it says `typeset character number
%so-and-so'.
The \cs{char} command looks superficially a bit like
the \verb-^^- substitution mechanism (Chapter~\ref{mouth}).
Both mechanisms access characters without directly denoting them.
However, the \verb-^^- mechanism operates in a very early stage of
processing (in the input processor of \TeX,
but before category code
assignment); the \cs{char} command, on the other hand,
comes in the final stages of processing.
In effect it says `typeset character number
so-and-so'.
%There is a construction to let a control sequence stand
%for some character code: the \csterm chardef\par\ command.
%The syntax of this is \label{chardef}
%\begin{disp}\cs{chardef}\gram{control sequence}\gr{equals}\gram{number},
%\end{disp}
%where the number can be an explicit
%representation or a counter value, but it can also be
%a character code
%obtained using the left quote command (see above;
%the full definition of \gr{number} is given in Chapter~\ref{number}).
%In the plain format
%the latter possibility is used in
%definitions such as
%\begin{verbatim}
%\chardef\%=`\%
%\end{verbatim}
%which could have been given equivalently as
%\begin{verbatim}
%\chardef\%=37
%\end{verbatim}
%After this command, the control symbol \verb>\%>
%used on its own is a synonym for \verb>\char37>,
%that is, the command to typeset character~37
%(usually the per cent character).
There is a construction to let a control sequence stand
for some character code: the \csterm chardef\par\ command.
The syntax of this is \label{chardef}
\begin{disp}\cs{chardef}\gram{control sequence}\gr{equals}\gram{number},
\end{disp}
where the number can be an explicit
representation or a counter value, but it can also be
a character code
obtained using the left quote command (see above;
the full definition of \gr{number} is given in Chapter~\ref{number}).
In the plain format
the latter possibility is used in
definitions such as
\begin{verbatim}
\chardef\%=`\%
\end{verbatim}
which could have been given equivalently as
\begin{verbatim}
\chardef\%=37
\end{verbatim}
After this command, the control symbol \verb>\%>
used on its own is a synonym for \verb>\char37>,
that is, the command to typeset character~37
(usually the per cent character).
%A control sequence that has been defined with a \cs{chardef}
%command can also be used as a \gr{number}.
%This fact is used in allocation commands such as
%\cs{newbox} (see Chapters~\ref{number} and~\ref{alloc}).
%Tokens defined with \cs{mathchardef} can also be used this
%way.
A control sequence that has been defined with a \cs{chardef}
command can also be used as a \gr{number}.
This fact is used in allocation commands such as
\cs{newbox} (see Chapters~\ref{number} and~\ref{alloc}).
Tokens defined with \cs{mathchardef} can also be used this
way.
%\subsection{Implicit character tokens: \protect\cs{let}}
\subsection{Implicit character tokens: \protect\cs{let}}
%Another construction defining a control sequence
%to stand for (among other things)
%a character is~\cs{let}\cstoidx let\par:
%\begin{disp}\cs{let}\gr{control sequence}\gr{equals}\gr{token}\end{disp}
%with a character token on the right hand side of the (optional)
%equals sign. The result is called an \indextermbus{implicit}{character} token.
%(See page~\pageref{let} for a further discussion of~\cs{let}.)
Another construction defining a control sequence
to stand for (among other things)
a character is~\cs{let}\cstoidx let\par:
\begin{disp}\cs{let}\gr{control sequence}\gr{equals}\gr{token}\end{disp}
with a character token on the right hand side of the (optional)
equals sign. The result is called an \indextermbus{implicit}{character} token.
(See page~\pageref{let} for a further discussion of~\cs{let}.)
%In the
%plain format there are for instance synonyms for
%the open and close brace:
%\begin{verbatim}
%\let\bgroup={ \let\egroup=}
%\end{verbatim}
%The resulting control sequences are called `implicit braces'
%(see Chapter~\ref{group}).
In the
plain format there are for instance synonyms for
the open and close brace:
\begin{verbatim}
\let\bgroup={ \let\egroup=}
\end{verbatim}
The resulting control sequences are called `implicit braces'
(see Chapter~\ref{group}).
%Assigning characters by \cs{let}
%is different from defining control sequences by \cs{chardef},
%in the sense that \cs{let}
%makes the control sequence stand for the combination
%of a character code and category code.
Assigning characters by \cs{let}
is different from defining control sequences by \cs{chardef},
in the sense that \cs{let}
makes the control sequence stand for the combination
of a character code and category code.
%As an example
%\begin{verbatim}
%\catcode`|=2 % make the bar an end of group
%\let\b=| % make \b a bar character
%{\def\m{...}\b \m
%\end{verbatim}
%gives an `undefined control sequence \cs{m}'
%because the \cs{b} closed the group inside which \cs{m}
%was defined. On the other hand,
%\begin{verbatim}
%\let\b=| % make \b a bar character
%\catcode`|=2 % make the bar character end of group
%{\def\m{...}\b \m
%\end{verbatim}
%leaves one group open, and it prints a vertical bar
%(or whatever is in position 124 of the current font).
%The first of these examples
%implies that even when the braces have been redefined
%(for instance into active characters for macros that
%format C code) the beginning-of-group and end-of-group
%functionality is available through the control sequences
%\cs{bgroup} and~\cs{egroup}.
As an example
\begin{verbatim}
\catcode`|=2 % make the bar an end of group
\let\b=| % make \b a bar character
{\def\m{...}\b \m
\end{verbatim}
gives an `undefined control sequence \cs{m}'
because the \cs{b} closed the group inside which \cs{m}
was defined. On the other hand,
\begin{verbatim}
\let\b=| % make \b a bar character
\catcode`|=2 % make the bar character end of group
{\def\m{...}\b \m
\end{verbatim}
leaves one group open, and it prints a vertical bar
(or whatever is in position 124 of the current font).
The first of these examples
implies that even when the braces have been redefined
(for instance into active characters for macros that
format C code) the beginning-of-group and end-of-group
functionality is available through the control sequences
\cs{bgroup} and~\cs{egroup}.
%Here is
%another example to show
%that implicit character tokens are hard to distinguish
%from real character tokens. After the above sequence
%\begin{verbatim}
%\catcode`|=2 \let\b=|
%\end{verbatim}
%the tests
%\begin{verbatim}
%\if\b|
%\end{verbatim}
%and
%\begin{verbatim}
%\ifcat\b}
%\end{verbatim}
%are both true.
Here is
another example to show
that implicit character tokens are hard to distinguish
from real character tokens. After the above sequence
\begin{verbatim}
\catcode`|=2 \let\b=|
\end{verbatim}
the tests
\begin{verbatim}
\if\b|
\end{verbatim}
and
\begin{verbatim}
\ifcat\b}
\end{verbatim}
are both true.
%Yet another example can be found in the plain format:
%the commands
%\begin{verbatim}
%\let\sp=^ \let\sb=_
%\end{verbatim}
%allow people without an
%underscore or circumflex on their keyboard to
%make sub- and superscripts in mathematics.
%For instance:
%\begin{disp}\verb>x\sp2\sb{ij}>\quad gives\quad $x\sp2\sb{ij}$\end{disp}
%If a person typing in the format itself does not have
%these keys, some further tricks are needed:\label{spsb:truc}
%\begin{verbatim}
%{\lccode`,=94 \lccode`.=95 \catcode`,=7 \catcode`.=8
%\lowercase{\global\let\sp=, \global\let\sb=.}}
%\end{verbatim}
%will do the job; see below for an explanation of lowercase codes.
%The \verb>^^> method as it was in \TeX\ version~2
%(see page~\pageref{hathat}) cannot be used here,
%as it would require typing two characters that can ordinarily
%not be input.
%With the extension in \TeX\ version~3 it would also be possible
%to write
%\begin{verbatim}
%{\catcode`\,=7
%\global\let\sp=,,5e \global\let\sb=,,5f}
%\end{verbatim}
%denoting the codes 94 and 95 hexadecimally.
Yet another example can be found in the plain format:
the commands
\begin{verbatim}
\let\sp=^ \let\sb=_
\end{verbatim}
allow people without an
underscore or circumflex on their keyboard to
make sub- and superscripts in mathematics.
For instance:
\begin{disp}\verb>x\sp2\sb{ij}>\quad gives\quad $x\sp2\sb{ij}$\end{disp}
If a person typing in the format itself does not have
these keys, some further tricks are needed:\label{spsb:truc}
\begin{verbatim}
{\lccode`,=94 \lccode`.=95 \catcode`,=7 \catcode`.=8
\lowercase{\global\let\sp=, \global\let\sb=.}}
\end{verbatim}
will do the job; see below for an explanation of lowercase codes.
The \verb>^^> method as it was in \TeX\ version~2
(see page~\pageref{hathat}) cannot be used here,
as it would require typing two characters that can ordinarily
not be input.
With the extension in \TeX\ version~3 it would also be possible
to write
\begin{verbatim}
{\catcode`\,=7
\global\let\sp=,,5e \global\let\sb=,,5f}
\end{verbatim}
denoting the codes 94 and 95 hexadecimally.
%Finding out just what a control sequence has been defined to be with
%\cs{let} can be done using \cs{meaning}:
%the sequence
%\begin{verbatim}
%\let\x=3 \meaning\x
%\end{verbatim}
%gives
%`\n{the character 3}'.
Finding out just what a control sequence has been defined to be with
\cs{let} can be done using \cs{meaning}:
the sequence
\begin{verbatim}
\let\x=3 \meaning\x
\end{verbatim}
gives
`\n{the character 3}'.
%%\point Accents
%\section{Accents}
%\point Accents
\section{Accents}
%\emph{Accents}\index{accents} can be placed by the
%\gr{horizontal command}~\csterm accent\par
%\label{character}:
%\begin{disp}\cs{accent}\gr{8-bit number}\gr{optional assignments}%
% \gr{character}\end{disp}
%where \gr{character} is a character of
%category 11\index{category!11} or~12\index{category!12},
%a~\cs{char}\gr{8-bit number} command, or a~\cs{chardef} token. If none
%of these four types of \gr{character} follows, the accent is taken to
%be a \cs{char} command itself; this gives an accent `suspended in
%mid-air'. Otherwise the accent is placed on top of the following
%character. Font changes between the accent and the character can be
%effected by the \gr{optional assignments}.
\emph{Accents}\index{accents} can be placed by the
\gr{horizontal command}~\csterm accent\par
\label{character}:
\begin{disp}\cs{accent}\gr{8-bit number}\gr{optional assignments}%
\gr{character}\end{disp}
where \gr{character} is a character of
category 11\index{category!11} or~12\index{category!12},
a~\cs{char}\gr{8-bit number} command, or a~\cs{chardef} token. If none
of these four types of \gr{character} follows, the accent is taken to
be a \cs{char} command itself; this gives an accent `suspended in
mid-air'. Otherwise the accent is placed on top of the following
character. Font changes between the accent and the character can be
effected by the \gr{optional assignments}.
%An unpleasant implication of the fact that an \cs{accent} command
%has to be followed by a \gr{character} is that it is not
%possible to place an accent on a ligature, or
%two accents on top of each other.
%In some languages, such as Hindi or Vietnamese,
%such double accents do occur.
%Positioning accents on top of each other is possible,
%however, in math mode.
An unpleasant implication of the fact that an \cs{accent} command
has to be followed by a \gr{character} is that it is not
possible to place an accent on a ligature, or
two accents on top of each other.
In some languages, such as Hindi or Vietnamese,
such double accents do occur.
Positioning accents on top of each other is possible,
however, in math mode.
%The width of a character with an accent is the same as that of
%the unaccented character. \TeX\ assumes that the
%accent as it appears in the font file
%is properly positioned for a character that is as high
%as the x-height of the font; for characters with other heights
%it correspondingly lowers or raises the accent.
The width of a character with an accent is the same as that of
the unaccented character. \TeX\ assumes that the
accent as it appears in the font file
is properly positioned for a character that is as high
as the x-height of the font; for characters with other heights
it correspondingly lowers or raises the accent.
%No genuine under-accents exist in \TeX. They are
%implemented as low placed over-accents. A~way of handling
%them more correctly would be to write a macro that
%measures the following character, and raises or drops
%the accent accordingly.
%The cedilla macro, \cs{c}\cstoidx c\par,
%in plain \TeX\ does something along these lines. However,
%it does not drop the accent for characters with descenders.
No genuine under-accents exist in \TeX. They are
implemented as low placed over-accents. A~way of handling
them more correctly would be to write a macro that
measures the following character, and raises or drops
the accent accordingly.
The cedilla macro, \cs{c}\cstoidx c\par,
in plain \TeX\ does something along these lines. However,
it does not drop the accent for characters with descenders.
%The horizontal positioning of an accent is controlled by
%\cs{fontdimen1}, \indextermsub{slant}{per point}. Kerns are used
%for the horizontal movement. Note that, although they
%are inserted automatically, these kerns are classified
%as {\italic explicit\/} kerns. Therefore they inhibit hyphenation
%in the parts of the word before and after the kern.
The horizontal positioning of an accent is controlled by
\cs{fontdimen1}, \indextermsub{slant}{per point}. Kerns are used
for the horizontal movement. Note that, although they
are inserted automatically, these kerns are classified
as {\italic explicit\/} kerns. Therefore they inhibit hyphenation
in the parts of the word before and after the kern.
%As an example of kerning for accents,
%here follows the dump of a horizontal list.
%\message{maybe italic correction for extra line}
%\begin{verbatim}
%\setbox0=\hbox{\it \`l}
%\showbox0
%\end{verbatim}
%gives
%\begin{verbatim}
%\hbox(9.58334+0.0)x2.55554
%.\kern -0.61803 (for accent)
%.\hbox(6.94444+0.0)x5.11108, shifted -2.6389
%..\tenit ^^R
%.\kern -4.49306 (for accent)
%.\tenit l
%\end{verbatim}
%Note that the accent is placed first, so afterwards the italic
%correction of the last character is still available.
As an example of kerning for accents,
here follows the dump of a horizontal list.
\message{maybe italic correction for extra line}
\begin{verbatim}
\setbox0=\hbox{\it \`l}
\showbox0
\end{verbatim}
gives
\begin{verbatim}
\hbox(9.58334+0.0)x2.55554
.\kern -0.61803 (for accent)
.\hbox(6.94444+0.0)x5.11108, shifted -2.6389
..\tenit ^^R
.\kern -4.49306 (for accent)
.\tenit l
\end{verbatim}
Note that the accent is placed first, so afterwards the italic
correction of the last character is still available.
%\section{Testing characters}
\section{Testing characters}
%Equality of character codes is tested by \cs{if}:
%\begin{disp}\cs{if}\gr{token$_1$}\gr{token$_2$}\end{disp}
%Tokens following this conditional are expanded until two
%unexpandable tokens are left. The condition is then true
%if those tokens are character tokens with the same character
%code, regardless of category code.
Equality of character codes is tested by \cs{if}:
\begin{disp}\cs{if}\gr{token$_1$}\gr{token$_2$}\end{disp}
Tokens following this conditional are expanded until two
unexpandable tokens are left. The condition is then true
if those tokens are character tokens with the same character
code, regardless of category code.
%An unexpandable control
%sequence is considered to have character code 256 and
%category code~16\index{category!16}
%(so that it is unequal to anything except
%another control sequence), except in the case
%where it had been \cs{let} to a non-active character token.
%In that case it is considered to have the character code
%and category code of that character. This was mentioned above.
An unexpandable control
sequence is considered to have character code 256 and
category code~16\index{category!16}
(so that it is unequal to anything except
another control sequence), except in the case
where it had been \cs{let} to a non-active character token.
In that case it is considered to have the character code
and category code of that character. This was mentioned above.
%The test \cs{ifcat} for category codes was mentioned
%in Chapter~\ref{mouth}; the test
%\begin{disp}\cs{ifx}\gr{token$_1$}\gr{token$_2$}\end{disp}
%can be used to test for category code and character code
%simultaneously.
%The tokens following this test are not expanded.
%However, if they are macros, \TeX\
%tests their expansions for equality.
The test \cs{ifcat} for category codes was mentioned
in Chapter~\ref{mouth}; the test
\begin{disp}\cs{ifx}\gr{token$_1$}\gr{token$_2$}\end{disp}
can be used to test for category code and character code
simultaneously.
The tokens following this test are not expanded.
However, if they are macros, \TeX\
tests their expansions for equality.
%Quantities defined by \cs{chardef} can be tested with
%\cs{ifnum}:
%\begin{verbatim}
%\chardef\a=`x \chardef\b=`y \ifnum\a=\b % is false
%\end{verbatim}
%based on the fact (see Chapter~\ref{number}) that
%\gr{chardef token}s can be used as numbers.
Quantities defined by \cs{chardef} can be tested with
\cs{ifnum}:
\begin{verbatim}
\chardef\a=`x \chardef\b=`y \ifnum\a=\b % is false
\end{verbatim}
based on the fact (see Chapter~\ref{number}) that
\gr{chardef token}s can be used as numbers.
%See also section~\ref{sec:charactertests}
See also section~\ref{sec:charactertests}
%\section{Uppercase and lowercase}
\section{Uppercase and lowercase}
%%\spoint[uc/lc] Uppercase and lowercase codes
%\subsection{Uppercase and lowercase codes}
%\label{uc/lc}
%\spoint[uc/lc] Uppercase and lowercase codes
\subsection{Uppercase and lowercase codes}
\label{uc/lc}
%To each of the character codes correspond\cstoidx lccode\par\cstoidx uccode\par
%an \indextermsub{uppercase}{code}\index{code!uppercase|see{uppercase, code}}
%and a \indextermsub{lowercase}{code}\index{code!lowercase|see{lowercase, code}}
%(for still more codes see below).
%These can be assigned
%by
%\begin{Disp}\cs{uccode}\gram{number}\gr{equals}\gram{number}\end{Disp}
%and
%\begin{Disp}\cs{lccode}\gram{number}\gr{equals}\gram{number}.\end{Disp}
%In \IniTeX\ codes \verb-`a..`z-, \verb-`A..`Z- have uppercase code
%\label{ini:uclc}
%\verb-`A..`Z- and lowercase code \verb-`a..`z-.
%All other character codes have both uppercase and lowercase
%code zero.
To each of the character codes correspond\cstoidx lccode\par\cstoidx uccode\par
an \indextermsub{uppercase}{code}\index{code!uppercase|see{uppercase, code}}
and a \indextermsub{lowercase}{code}\index{code!lowercase|see{lowercase, code}}
(for still more codes see below).
These can be assigned
by
\begin{Disp}\cs{uccode}\gram{number}\gr{equals}\gram{number}\end{Disp}
and
\begin{Disp}\cs{lccode}\gram{number}\gr{equals}\gram{number}.\end{Disp}
In \IniTeX\ codes \verb-`a..`z-, \verb-`A..`Z- have uppercase code
\label{ini:uclc}
\verb-`A..`Z- and lowercase code \verb-`a..`z-.
All other character codes have both uppercase and lowercase
code zero.
%%\spoint[upcase] Uppercase and lowercase commands
%\subsection{Uppercase and lowercase commands}
%\label{upcase}
%\spoint[upcase] Uppercase and lowercase commands
\subsection{Uppercase and lowercase commands}
\label{upcase}
%The commands \verb-\uppercase{...}- and \verb-\lowercase{...}-
%\cstoidx uppercase\par\cstoidx lowercase\par
%go through their argument lists, replacing all character
%codes of explicit character tokens
%by their uppercase and lowercase code respectively
%if these are non-zero,
%without changing the category codes.
The commands \verb-\uppercase{...}- and \verb-\lowercase{...}-
\cstoidx uppercase\par\cstoidx lowercase\par
go through their argument lists, replacing all character
codes of explicit character tokens
by their uppercase and lowercase code respectively
if these are non-zero,
without changing the category codes.
%The argument of \cs{uppercase} and \cs{lowercase}
%is a \gr{general text}, which is defined as
%\begin{Disp} \gr{general text} $\longrightarrow$ \gr{filler}\lb
% \gr{balanced text}\gr{right brace}\end{Disp}
%(for the definition of \gr{filler} see Chapter~\ref{gramm})
%meaning that the left brace can be implicit, but the closing
%right brace must be an explicit character token with category
%code~2. \TeX\ performs expansion to find the opening
%brace.
The argument of \cs{uppercase} and \cs{lowercase}
is a \gr{general text}, which is defined as
\begin{Disp} \gr{general text} $\longrightarrow$ \gr{filler}\lb
\gr{balanced text}\gr{right brace}\end{Disp}
(for the definition of \gr{filler} see Chapter~\ref{gramm})
meaning that the left brace can be implicit, but the closing
right brace must be an explicit character token with category
code~2. \TeX\ performs expansion to find the opening
brace.
%Uppercasing and lowercasing are executed in the execution processor;
%they are not `macro expansion' activities
%like \cs{number} or \cs{string}.
%The sequence (attempting to produce~\cs{A})
%\begin{verbatim}
%\expandafter\csname\uppercase{a}\endcsname
%\end{verbatim}
%gives an error (\TeX\ inserts an \cs{endcsname} before the
%\cs{uppercase} because \cs{uppercase} is unexpandable), but
%\begin{verbatim}
%\uppercase{\csname a\endcsname}
%\end{verbatim}
%works.
Uppercasing and lowercasing are executed in the execution processor;
they are not `macro expansion' activities
like \cs{number} or \cs{string}.
The sequence (attempting to produce~\cs{A})
\begin{verbatim}
\expandafter\csname\uppercase{a}\endcsname
\end{verbatim}
gives an error (\TeX\ inserts an \cs{endcsname} before the
\cs{uppercase} because \cs{uppercase} is unexpandable), but
\begin{verbatim}
\uppercase{\csname a\endcsname}
\end{verbatim}
works.
%As an example of the correct use of \cs{uppercase}, here
%is a macro that tests if a character is uppercase:
%\begin{verbatim}
%\def\ifIsUppercase#1{\uppercase{\if#1}#1}
%\end{verbatim}
%The same test can be
%performed by \verb>\ifnum`#1=\uccode`#1>.
As an example of the correct use of \cs{uppercase}, here
is a macro that tests if a character is uppercase:
\begin{verbatim}
\def\ifIsUppercase#1{\uppercase{\if#1}#1}
\end{verbatim}
The same test can be
performed by \verb>\ifnum`#1=\uccode`#1>.
%Hyphenation of words starting with an uppercase character,
%that is, a character not equal to its own \cs{lccode},
%is subject to the \cs{uchyph} parameter: if this
%is positive, hyphenation of capitalized words is allowed.
%See also Chapter~\ref{line:break}.
Hyphenation of words starting with an uppercase character,
that is, a character not equal to its own \cs{lccode},
is subject to the \cs{uchyph} parameter: if this
is positive, hyphenation of capitalized words is allowed.
See also Chapter~\ref{line:break}.
%%\spoint Uppercase and lowercase forms of keywords
%\subsection{Uppercase and lowercase forms of keywords}
%\spoint Uppercase and lowercase forms of keywords
\subsection{Uppercase and lowercase forms of keywords}
%Each character in \TeX\ keywords, such as \n{pt}, can be
%given in uppercase or lowercase form.
%For instance, \n{pT}, \n{Pt}, \n{pt}, and~\n{PT} all have
%the same meaning. \TeX\ does not use
%the \cs{uccode} and \cs{lccode} tables here to
%determine the lowercase form. Instead it
%converts uppercase characters to lowercase by adding~32
%\ldash the \ascii{} difference between uppercase and lowercase
%characters \rdash to their character code. This has some implications
%for implementations of \TeX\ for non-roman alphabets;
%see page 370 of \TeXbook, \cite{Knuth:TeXbook}.
Each character in \TeX\ keywords, such as \n{pt}, can be
given in uppercase or lowercase form.
For instance, \n{pT}, \n{Pt}, \n{pt}, and~\n{PT} all have
the same meaning. \TeX\ does not use
the \cs{uccode} and \cs{lccode} tables here to
determine the lowercase form. Instead it
converts uppercase characters to lowercase by adding~32
\ldash the \ascii{} difference between uppercase and lowercase
characters \rdash to their character code. This has some implications
for implementations of \TeX\ for non-roman alphabets;
see page 370 of \TeXbook, \cite{Knuth:TeXbook}.
%\subsection{Creative use of \cs{uppercase} and \cs{lowercase}}
\subsection{Creative use of \cs{uppercase} and \cs{lowercase}}
%The fact that \cs{uppercase} and \cs{lowercase} do not change
%category codes can sometimes be used to create certain
%character-code--category-code combinations that would
%otherwise be difficult to produce. See for instance the
%explanation of the \cs{newif} macro in Chapter~\ref{if},
%and another example on page~\pageref{spsb:truc}.
The fact that \cs{uppercase} and \cs{lowercase} do not change
category codes can sometimes be used to create certain
character-code--category-code combinations that would
otherwise be difficult to produce. See for instance the
explanation of the \cs{newif} macro in Chapter~\ref{if},
and another example on page~\pageref{spsb:truc}.
%For a slightly different application, consider the
%problem (solved by Rainer Sch\"opf) of,
%given a counter \verb-\newcount\mycount-, writing character
%number \verb-\mycount- to the terminal.
%Here is a solution:
%%\begin{verbatim}
%%\lccode`a=\mycount \chardef\terminal=16
%%\lowercase{\write\terminal{a}}
%%\end{verbatim}
%\begin{verbatim}
%\lccode`a=\mycount \chardef\terminal=16
%\end{verbatim}
%\begin{verbatim}
%\lowercase{\write\terminal{a}}
%\end{verbatim}
%The \cs{lowercase} command effectively changes the
%argument of the \cs{write} command from~`\n a'
%into whatever it should be.
For a slightly different application, consider the
problem (solved by Rainer Sch\"opf) of,
given a counter \verb-\newcount\mycount-, writing character
number \verb-\mycount- to the terminal.
Here is a solution:
%\begin{verbatim}
%\lccode`a=\mycount \chardef\terminal=16
%\lowercase{\write\terminal{a}}
%\end{verbatim}
\begin{verbatim}
\lccode`a=\mycount \chardef\terminal=16
\end{verbatim}
\begin{verbatim}
\lowercase{\write\terminal{a}}
\end{verbatim}
The \cs{lowercase} command effectively changes the
argument of the \cs{write} command from~`\n a'
into whatever it should be.
%%\point[codename] Codes of a character
%\section{Codes of a character}
%\label{codename}
%\point[codename] Codes of a character
\section{Codes of a character}
\label{codename}
%Each character code has a number of \gr{codename}s
%associated\indexterm{codenames}
%with it. These are integers in various ranges that determine
%how the character is treated in various contexts, or
%how the occurrence of that character changes the workings
%of \TeX\ in certain contexts.
Each character code has a number of \gr{codename}s
associated\indexterm{codenames}
with it. These are integers in various ranges that determine
how the character is treated in various contexts, or
how the occurrence of that character changes the workings
of \TeX\ in certain contexts.
%The code names are as follows:
%\begin{description}\item [\cs{catcode}]
%\gr{4-bit number} (0--15); the category to which a character belongs.
%This is treated in Chapter~\ref{mouth}.
%\item [\cs{mathcode}]
%\gr{15-bit number} (0--\verb-"7FFF-) or \verb-"8000-;
%determines how a character is treated
%in math mode. See Chapter~\ref{mathchar}.
%\item [\cs{delcode}]
%\gr{27-bit number} (0--\n{\hex7$\,$FFF$\,$FFF});
%determines how a character is treated after
%\cs{left} or \cs{right} in math mode.
%See page~\pageref{delcodes}.
%\item [\cs{sfcode}]
%integer; determines how spacing is affected after this character.
%See Chapter~\ref{space}.
%\item [\cs{lccode}, \cs{uccode}]
%\gr{8-bit number} (0-255); lowercase and
%uppercase codes \rdash these were treated above.
%\end{description}
The code names are as follows:
\begin{description}\item [\cs{catcode}]
\gr{4-bit number} (0--15); the category to which a character belongs.
This is treated in Chapter~\ref{mouth}.
\item [\cs{mathcode}]
\gr{15-bit number} (0--\verb-"7FFF-) or \verb-"8000-;