Autor
| c compiler comparison
| ARTRAG msx master Mensajes: 1802 | Publicado: Marzo 01 2007, 18:09   | Hitech C CPM: 2086 ints!!!!!
I modified the code posted here and I linked it with the Hitech C cross compiler
this is the code
psect text
global _main
_main:
ld hl,0
ld (-866),hl
global ncsv, cret, indir
call ncsv
defw f31
global _printf
ld hl,19f
push hl
call _printf
ld hl,2
add hl,sp
ld sp,hl
ld (ix+-10),.low.1
ld (ix+1+-10),.high.1
jp l6
l3:
ld (ix+-8),.low.0
ld (ix+1+-8),.high.0
ld (ix+-2),.low.0
ld (ix+1+-2),.high.0
jp l10
l7:
global _flags
ld e,(ix+-2)
ld d,(ix+1+-2)
ld hl,_flags
add hl,de
ld (hl),1
l9:
ld l,(ix+-2)
ld h,(ix+1+-2)
inc hl
ld (ix+-2),l
ld (ix+1+-2),h
l10:
ld e,(ix+-2)
ld d,(ix+1+-2)
ld hl,8190
global wrelop
call wrelop
jp age,l7
l8:
ld (ix+-2),.low.0
ld (ix+1+-2),.high.0
jp l14
l11:
ld e,(ix+-2)
ld d,(ix+1+-2)
ld hl,_flags
add hl,de
ld a,(hl)
or a
jp az,l15
ld e,(ix+-2)
ld d,(ix+1+-2)
ld l,(ix+-2)
ld h,(ix+1+-2)
add hl,de
inc hl
inc hl
inc hl
ld (ix+-6),l
ld (ix+1+-6),h
ld e,(ix+-6)
ld d,(ix+1+-6)
ld l,(ix+-2)
ld h,(ix+1+-2)
add hl,de
ld (ix+-4),l
ld (ix+1+-4),h
jp l19
l16:
ld e,(ix+-4)
ld d,(ix+1+-4)
ld hl,_flags
add hl,de
ld (hl),0
l18:
ld e,(ix+-6)
ld d,(ix+1+-6)
ld l,(ix+-4)
ld h,(ix+1+-4)
add hl,de
ld (ix+-4),l
ld (ix+1+-4),h
l19:
ld e,(ix+-4)
ld d,(ix+1+-4)
ld hl,8190
global wrelop
call wrelop
jp age,l16
l17:
ld l,(ix+-8)
ld h,(ix+1+-8)
inc hl
ld (ix+-8),l
ld (ix+1+-8),h
l15:
l13:
ld l,(ix+-2)
ld h,(ix+1+-2)
inc hl
ld (ix+-2),l
ld (ix+1+-2),h
l14:
ld e,(ix+-2)
ld d,(ix+1+-2)
ld hl,8190
global wrelop
call wrelop
jp age,l11
l12:
l5:
ld l,(ix+-10)
ld h,(ix+1+-10)
inc hl
ld (ix+-10),l
ld (ix+1+-10),h
l6:
ld e,(ix+-10)
ld d,(ix+1+-10)
ld hl,10
global wrelop
call wrelop
jp age,l3
l4:
ld l,(ix+-8)
ld h,(ix+1+-8)
push hl
ld hl,29f
push hl
call _printf
ld hl,(-866)
push hl
ld hl,u39
push hl
call _printf
ld hl,2+2
add hl,sp
ld sp,hl
l2:
jp cret
f31 equ -10
psect data
19:
defb 49,48,32,105,116,101,114,97,116,105,111,110,115,10,0
29:
defb 37,100,32,112,114,105,109,101,115,46,10,0
u39:
defb "%d ints.",10,0
psect bss
_flags:
defs 8191
| | ARTRAG msx master Mensajes: 1802 | Publicado: Marzo 02 2007, 00:02   | SDCC (Small Devices C Compiler)
It takes 1050 ints at 60Hz!!
I have modified the ASM code posted here in order to assemble it with Hitech C cross compiler libraries
This is the result
psect text
global _main,_printf
_main:
ld hl,0
ld (-866),hl
push ix
ld ix,0
add ix,sp
ld hl,-8
add hl,sp
ld sp,hl
ld hl,__str_0
push hl
call _printf
pop af
ld (ix-8),0x01
ld (ix-7),0x00
_00115$: ld a,0x0A
sub (ix+-8)
ld a,0x00
sbc a,(ix+-7)
jp M,_00118$
ld de,0x0000
_00103$: ld a,0xFE
sub e
ld a,0x1F
sbc a,d
jp M,_00106$
ld hl,_flags
add hl,de
ld c,l
ld b,h
ld a,0x01
ld (bc),a
inc de
jp _00103$
_00106$: ld (ix-6),0x00
ld (ix-5),0x00
ld (ix-2),0x00
ld (ix-1),0x00
_00111$: ld a,0xFE
sub (ix-2)
ld a,0x1F
sbc a,(ix-1)
jp M,_00131$
ld a,_flags & 255 ;<
add a,(ix-2)
ld c,a
ld a,_flags /256 ;>
adc a,(ix-1)
ld b,a
ld a,(bc)
or a
jr Z,_00113$
ld c,(ix-2)
ld b,(ix-1)
sla c
rl b
ld a,c
add a,0x03
ld (ix-4),a
ld a,b
adc a,0x00
ld (ix-3),a
ld a,(ix-2)
add a,(ix-4)
ld c,a
ld a,(ix-1)
adc a,(ix-3)
ld b,a
_00107$: ld a,0xFE
sub c
ld a,0x1F
sbc a,b
jp M,_00110$
ld hl,_flags
add hl,bc
ld e,l
ld d,h
ld a,0x00
ld (de),a
ld a,c
add a,(ix-4)
ld c,a
ld a,b
adc a,(ix-3)
ld b,a
jp _00107$
_00110$: inc (ix-6)
jr NZ,_00135$
inc (ix-5)
_00135$:
_00113$: inc (ix-2)
jr NZ,_00136$
inc (ix-1)
_00136$: jp _00111$
_00131$: ld c,(ix-6)
ld b,(ix-5)
inc (ix-8)
jr NZ,_00137$
inc (ix-7)
_00137$: jp _00115$
_00118$: push bc
ld hl,__str_1
push hl
call _printf
pop af
ld hl,(-866)
push hl
ld hl,u39
push hl
call _printf
pop af
pop af
ld sp,ix
pop ix
ret
psect data
__str_0:
defb "10 iterations"
db 0x0A
db 0x00
__str_1:
defb "%d primes."
db 0x0A
db 0x00
u39:
defb "%d ints.",10,0
psect bss
_flags:
defs 8191
| | MicroTech msx lover Mensajes: 123 | Publicado: Marzo 02 2007, 11:07   |
#define JIFFY 0xFC9E
#define TRUE 1
#define FALSE 0
#define SIZE 8190
char flags[SIZE+1];
size_t start0, stop0;
void main()
{
unsigned int i,k;
unsigned int prime,count;
char iter;
printf("10 iterations\n");
start0 = *((size_t *) JIFFY);
for (iter = 10; iter--; ) /* do program 10 times */
{
count = 0; /* initialize prime counter */
flags[0] = TRUE;
opldir(&flags[1], &flags[0], (sizeof(flags) / sizeof(flags[0])) - 1);
for (i = 0; i <= SIZE; i++)
{
if (flags[i]) /* found a prime */
{
prime = i + i + 3; /* twice index + 3 */
for (k = i + prime; k <= SIZE; k += prime)
flags[k] = FALSE; /* kill all multiples */
count++; /* primes found */
}
}
}
stop0 = *((size_t *) JIFFY);
printf("%d primes.\n",count); /*primes found in 10th pass */
printf("timer 0 = %d\n", stop0 - start0);
}
With some simple "tricks" performance can increase, in this version:
1) only "essential" time is measured: printf are outside the "profiled" code
2) use unsigned instead of signed whenever possible
3) initialization of flags can be obtained by initializing only the first element and then copying it on all the subsequent elements:
opldir(param1, param2, param3)
is equivalent to:
ld hl,param2
ld de,param1
ld bc,param3
ldir
4) iter is char (8bit) instead of int (16 bit): better using only the type you really need
I got the following:
tR: 100 ints = approx. 1,66 secs
2+: 697 ints = approx 11,61 secs
| | ARTRAG msx master Mensajes: 1802 | Publicado: Marzo 02 2007, 14:55   | @ Microtech
great result!! even if it is one of the oldest compilers, it seems Ascii C 1.2 is the best one.
BTW for the sake of uniformity, Microtech, could you compile and run the test as it is ?
(I mean the second release that compute the int passed, without improvements, only
forcing the compiler to compile for max speed)
| | PingPong msx master Mensajes: 1069 | Publicado: Marzo 02 2007, 15:06   | Someone could made a simple top ten please?
| | MicroTech msx lover Mensajes: 123 | Publicado: Marzo 02 2007, 15:57   | The second release WITHOUT forcing compiler for max speed gives:
tR 144 ints
2+ 1000 ints
I don't remember how to force compilation in "max speed mode" and I don't know if this will give real performance gain.
I remember something like
#pragma OPTIMIZE SPEED or similar but I can't find documentation about it, I'll search at home... (you should wait till Monday)
| | PingPong msx master Mensajes: 1069 | Publicado: Marzo 02 2007, 16:03   | Another test. With this source:
struct mystr{
int a;
char b;
char c;
char j;
char unaligned[3];
}s;
void main(void)
{
struct mystr *px;
int k = 0;
px = &s;
for (k = 0; k<100; k++)
{
px->j = (char)k;
px->a = k;
px++;
}
}
Hitech C performs better than SoftTools WinIDE. Anyone could try with Ascii 'c'? thx
Generate this on SoftTools WinIDE:
;13 void main(void)
;14 {
_main::
.debug G _main 32 1
.debug { _main 32 13 @3 @2
ld hl,-4
call __fent##
;15 struct mystr *px;
.debug L px -2 33
;16 int k = 0;
.debug L k -4 6
.debug # 16
ld (ix-4),low 0
ld (ix-3),high 0
.cseg
;17 px = &s;
.debug # 17
ld hl,_s
ld (ix-2),l
ld (ix-1),h
;18 for (k = 0; k<100; k++)
@4:
.debug < @4 @5
.debug # 18
ld (ix-4),low 0
ld (ix-3),high 0
@6:
ld l,(ix-4)
ld h,(ix-3)
ld de,0x64
call __icmp##
jr nc,@7
;19 {
;20 px->j = (char)k;
@9:
.debug < @9 @10
.debug # 20
ld l,(ix-2)
ld h,(ix-1)
ld de,0x4
add hl,de
ld a,(ix-4)
ld (hl),a
;21 px->a = k;
.debug # 21
ld l,(ix-2)
ld h,(ix-1)
ld e,a
ld d,(ix-3)
ld (hl),e
inc hl
ld (hl),d
;22 px++;
.debug # 22
ld l,(ix-2)
ld h,(ix-1)
ld de,0x8
add hl,de
ld (ix-2),l
ld (ix-1),h
;23 }
@10:
.debug >
;24 }
ld l,a
ld h,(ix-3)
inc hl
ld (ix-4),l
ld (ix-3),h
jr @6
@7:
@5:
.debug >
@2:
.debug } @2 24
@1 equ 4
ld sp,ix
pop ix
ret
@3 equ 6
.bseg
_s::
.ds 8
.debug G _s 34 0
.debug T 0B000220000100010001000000
.debug T 070000210002002200
.debug T 390003220008000500066D79737472000600000002610003000200
.debug T 0262000300030002630003000400026A00230005000A756E61
.debug T 6C69676E656400
.debug T 0900012300030003000300
.end
Generate this on SoftTools Hitech C 7.5
global small_model
psect text,class=CODE
global _main
signat _main,24
global _s
global wrelop
_main:
push iy
ld hl,0
ld iy,_s
ld c,l
ld b,h
l5:
ld a,c
ld (iy+4),a
ld (iy+0),c
ld (iy+1),b
ld de,08h
add iy,de
inc bc
ld de,064h
ld l,c
ld h,b
call wrelop
jp m,l5
pop iy
ret
psect bss,class=DATA
_s:
defs 8
psect text
end
| | ARTRAG msx master Mensajes: 1802 | Publicado: Marzo 02 2007, 23:39   | this comes from IAR 4.06A
NAME main(16)
RSEG CODE(0)
RSEG UDATA0(0)
PUBLIC main
PUBLIC s
EXTERN ?CLZ80L_4_06_L00
RSEG CODE
main:
PUSH BC
PUSH DE
PUSH IX
LD IX,s
LD DE,0
?0001:
LD BC,32868
LD L,E
LD H,D
LD A,B
XOR H
LD H,A
SBC HL,BC
JR NC,?0000
?0002:
LD (IX+4),E
PUSH IX
POP HL
LD (HL),E
INC HL
LD (HL),D
LD BC,8
INC DE
ADD IX,BC
JR ?0001
?0000:
POP IX
POP DE
POP BC
RET
RSEG UDATA0
s:
DEFS 8
END
| | ARTRAG msx master Mensajes: 1802 | Publicado: Marzo 02 2007, 23:54   | This is sieve from IAR 4.06A
It takes only 943 ints @60Hz !!!!
I converted the ASM in order to assemble and link with Hitech C as IAR has no support for CPM and I do know (jet) how to do MSX roms
psect text
global _main,_printf
_main:
PUSH BC
PUSH DE
PUSH IY
PUSH IX
EXX
PUSH BC
PUSH DE
EXX
LD HL,0
LD (64670),HL
LD HL,_0010
PUSH HL
CALL _printf
POP AF
EXX
LD DE,1
_0040:
_0012:
PUSH DE
EXX
POP BC
LD HL,10
OR 128
SBC HL,BC
JP PO,_0036
XOR H
_0036:
EXX
JP M,_0011
_0013:
LD BC,0
EXX
LD DE,0
_0016:
LD HL,8190
OR 128
SBC HL,DE
JP PO,_0037
XOR H
_0037:
JP M,_0015
_0017:
LD HL,flags
ADD HL,DE
LD (HL),1
INC DE
JR _0016
_0015:
LD DE,0
_0020:
LD HL,8190
OR 128
SBC HL,DE
JP PO,_0038
XOR H
_0038:
JP M,_0019
_0021:
LD HL,flags
ADD HL,DE
LD A,(HL)
OR A
JR Z,_0024
_0023:
LD L,E
LD H,D
INC HL
ADD HL,HL
INC HL
PUSH HL
POP IY
ADD HL,DE
PUSH HL
POP IX
_0026:
PUSH IX
POP BC
LD HL,8190
OR 128
SBC HL,BC
JP PO,_0039
XOR H
_0039:
JP M,_0025
_0027:
LD HL,flags
ADD HL,BC
LD (HL),0
PUSH IY
POP BC
ADD IX,BC
JR _0026
_0025:
EXX
INC BC
EXX
_0024:
INC DE
JR _0020
_0019:
EXX
INC DE
JR _0040
_0011:
PUSH BC
EXX
LD HL,_0029
PUSH HL
CALL _printf
POP AF
POP AF
LD HL,64670
LD C,(HL)
INC HL
LD B,(HL)
PUSH BC
LD HL,_0030
PUSH HL
CALL _printf
POP AF
POP AF
EXX
POP DE
POP BC
EXX
POP IX
POP IY
POP DE
POP BC
RET
psect data
_0010:
DEFB '10 iterations'
DEFB 10,0
_0029:
DEFB '%d primes.'
DEFB 10,0
_0030:
DEFB '%d ints.'
DEFB 10,0
psect bss
flags:
DEFS 8191
END
| | ARTRAG msx master Mensajes: 1802 | Publicado: Marzo 02 2007, 23:59   | One of the interesting thing of IAR is that it seems
to support undocumented z80 instructions...
But it is not clear to me how to get directly an output
that can run on MSX
| | manuel msx guru Mensajes: 3635 | Publicado: Marzo 04 2007, 13:34   | This is what z88dk does with the original program:
;* * * * * Small-C/Plus z88dk * * * * *
; Version: v26-03-2004.01
;
; Reconstructed for the z80 Module Assembler
;
; Module compile time: Sun Mar 4 13:33:10 2007
MODULE sieve.c
INCLUDE "#z80_crt0.hdr"
._main
push bc
push bc
push bc
push bc
push bc
ld hl,i_1+0
push hl
ld a,1
call printf
pop bc
ld hl,1 ;const
pop bc
push hl
jp i_5
.i_3
pop hl
inc hl
push hl
dec hl
.i_5
pop hl
push hl
ld de,10 ;const
ex de,hl
call l_le
jp nc,i_4
ld hl,0 ;const
pop de
pop bc
push hl
push de
ld hl,8 ;const
add hl,sp
ld de,0 ;const
ex de,hl
call l_pint
jp i_8
.i_6
ld hl,8 ;const
add hl,sp
push hl
call l_gint ;
inc hl
pop de
call l_pint
dec hl
.i_8
ld hl,8 ;const
add hl,sp
ld e,(hl)
inc hl
ld d,(hl)
ld hl,8190 ;const
call l_le
jp nc,i_7
ld de,_flags
ld hl,10-2 ;const
add hl,sp
call l_gint ;
add hl,de
push hl
ld hl,1 ;const
ld a,l
call l_sxt
pop de
ld a,l
ld (de),a
jp i_6
.i_7
ld hl,8 ;const
add hl,sp
ld de,0 ;const
ex de,hl
call l_pint
jp i_11
.i_9
ld hl,8 ;const
add hl,sp
push hl
call l_gint ;
inc hl
pop de
call l_pint
dec hl
.i_11
ld hl,8 ;const
add hl,sp
ld e,(hl)
inc hl
ld d,(hl)
ld hl,8190 ;const
call l_le
jp nc,i_10
ld de,_flags
ld hl,10-2 ;const
add hl,sp
call l_gint ;
add hl,de
call l_gchar
ld a,h
or l
jp z,i_12
ld hl,4 ;const
add hl,sp
push hl
ld hl,10 ;const
add hl,sp
ld e,(hl)
inc hl
ld d,(hl)
push de
ld hl,12 ;const
add hl,sp
call l_gint ;
pop de
add hl,de
inc hl
inc hl
inc hl
pop de
call l_pint
ld hl,6 ;const
add hl,sp
push hl
ld hl,10 ;const
add hl,sp
ld e,(hl)
inc hl
ld d,(hl)
push de
ld hl,8 ;const
add hl,sp
call l_gint ;
pop de
add hl,de
pop de
call l_pint
jp i_15
.i_13
ld hl,6 ;const
add hl,sp
push hl
ld e,(hl)
inc hl
ld d,(hl)
push de
ld hl,8 ;const
add hl,sp
call l_gint ;
pop de
add hl,de
pop de
call l_pint
.i_15
ld hl,6 ;const
add hl,sp
ld e,(hl)
inc hl
ld d,(hl)
ld hl,8190 ;const
call l_le
jp nc,i_14
ld de,_flags
ld hl,8-2 ;const
add hl,sp
call l_gint ;
add hl,de
push hl
ld hl,0 ;const
ld a,l
call l_sxt
pop de
ld a,l
ld (de),a
jp i_13
.i_14
pop de
pop hl
inc hl
push hl
push de
dec hl
.i_12
jp i_9
.i_10
jp i_3
.i_4
ld hl,i_1+15
push hl
ld hl,4 ;const
add hl,sp
call l_gint ;
push hl
ld a,2
call printf
pop bc
pop bc
pop bc
pop bc
pop bc
pop bc
pop bc
ret
.i_1
defm "10 iterations"&13&""&0
defm "%d primes."&13&""&0
; --- Start of Static Variables ---
._flags defs 8191
; --- Start of Scope Defns ---
LIB vfprintf_mini
LIB feof
LIB getk
LIB gets
LIB vfprintf_fp
XDEF _main
LIB sprintf
LIB read
LIB open
LIB fgetc
LIB tell
LIB freopen
XDEF _flags
LIB fgets
LIB open_z88
LIB fopen_z88
LIB creat
LIB close
LIB fread
LIB puts
LIB fwrite
LIB getwd
LIB fseek
LIB ltoa_any
LIB rename
LIB fopen
LIB fchkstd
LIB ftell
LIB readbyte
LIB fprintf
LIB vsscanf
XREF __sgoioblk
LIB fgetc_cons
LIB fgets_cons
LIB mkdir
LIB fputc
LIB remove
LIB scanf
LIB fputs
LIB lseek
LIB vsprintf
LIB vfscanf
LIB nropen
LIB writebyte
LIB fclose
LIB rmdir
LIB fgetpos
LIB printf
LIB sscanf
LIB fdopen
LIB printk
LIB printn
LIB fdtell
LIB closeall
LIB freopen_z88
LIB ungetc
LIB getarg
LIB getcwd
LIB vfprintf_comp
LIB fscanf
LIB fabandon
LIB fdgetpos
XREF _vfprintf
LIB write
LIB fputc_cons
LIB puts_cons
; --- End of Scope Defns ---
; --- End of Compilation ---
| | PingPong msx master Mensajes: 1069 | Publicado: Marzo 04 2007, 13:58   | Quote:
| This is what z88dk does with the original program:
|
appear to be not the maximum of high speed optim code. Did you have enabled the speed optmizations?
| | manuel msx guru Mensajes: 3635 | Publicado: Marzo 04 2007, 14:00   | How do I get an executable with SDCC?
By the way, I can't get MicroTech's version compiled with z88dk...  (So I can't time it.)
For z88dk I didn't use any special flags. | | PingPong msx master Mensajes: 1069 | Publicado: Marzo 04 2007, 15:20   | Quote:
| How do I get an executable with SDCC?
By the way, I can't get MicroTech's version compiled with z88dk...  (So I can't time it.)
For z88dk I didn't use any special flags.
|
Do you mean a .COM runnable on msxdos?
Go here:
http://msx.gabiot.com/index_en.html | | MicroTech msx lover Mensajes: 123 | Publicado: Marzo 05 2007, 13:07   | I found on Wios manual the following (Ascii) C directives:
#pragma optimize time
#pragma regalo
#pragma nonrec
Which should mean (in order):
- privilege execution speed instead of code size
- allocate variables in registers whenever possible
- compile functions in non recursive mode so use registers to pass parameters (when possible)
I've added these directives at the top of source:
tR 144 ints: same as without optimizations
2+ 1009 ints: worst
So I removed the directives to return to the previous version:
tR 144 ints: Ok
2+ 1009 ints: in a previous reply I found 1000 ints instead
Anyway Ascii compiler seems to be produce already optimized code.
| |
| |
| |