I do not understand those optimizations :
PatternBasedOptimizer substitution in threed/lib/neonlib/src/VDP.asm, line 21: 2 bytes saved cp 1 jr c, MSX1 jr z, MSX2 Replaced by: cp 1 + 1 jr c, MSX2
How does it work the same way ?
Where is the jump to MSX1 ?
PatternBasedOptimizer substitution in threed/src/Application.asm, line 236: 4 bytes saved ld ix, Application_points Replaced by: PatternBasedOptimizer substitution in threed/src/Application.asm, line 243: 4 bytes saved ld ix, Application_edges Replaced by:
Replaced by what ??
They seem to have been just erased ("4 bytes saved").
@Grauw: oh, good catch! it was missing indeed, added
And oh, that error is interesting, I though I had that case covered. Let me investigate!
@Metalion: yeah, I need to improve the output haha, that means that it was removed, as it was unnecessary code (those values where never used)
@Metalion: oh!!!! good catch on the MSX1/MSX2 jump, I didn't realize it when I checked for output correctness! there must be a bug in my pattern matching code. Added to the to-do list, thanks for spotting it!
it would be cool if it could deal SDCC code
;app.c:12: while(1) { 00102$: ;app.c:13: p->x += p->dx; push bc pop iy inc iy inc iy ld e, 0 (iy) ld d, 1 (iy) ld l, c ld h, b ld a, (hl) inc hl ld h, (hl) ld l, a add hl, de ld 0 (iy), l ld 1 (iy), h jr 00102$ ;app.c:15: } -- 199 cycles
Oh, good idea, I'll add it to my to-do list, is there any example SDCC-generated assembler codebase publicly available that I could use to test?
I find pointers and addresses to be the top problem in SDCC.
some examples
#include "string.h" #include "math.h" int main() { return 0; } struct obj { int dx; int x; int dy; int y; }; void test(struct obj **arr, int n) { struct obj *p = *arr++; while(1) { p->x += p->dx; } } struct node { struct node *next; int dx; int x; int dy; int y; int dz; int z; struct node *prev; }; typedef struct node node; void test1(node *p) { while(p) { p->x += p->dx; p->y += p->dy; p->z += p->dz; p = p->next; } } void test2(node *p0) { node *p = p0; int *q = 0; int w = 0; int w2 = 0; while(p) { q = &(p->dx); w = *q; q++; w2 = *q; w2 += w; *q = w2; q++; w = *q; q++; w2 = *q; w2 += w; *q = w2; q++; w = *q; q++; w2 = *q; w2 += w; *q = w2; p = p->next; } }
;-------------------------------------------------------- ; File Created by SDCC : free open source ANSI-C Compiler ; Version 3.8.0 #10557 (MINGW64) ;-------------------------------------------------------- .module app .optsdcc -mz80 ;-------------------------------------------------------- ; Public variables in this module ;-------------------------------------------------------- .globl _test2 .globl _test1 .globl _test .globl _main ;-------------------------------------------------------- ; special function registers ;-------------------------------------------------------- ;-------------------------------------------------------- ; ram data ;-------------------------------------------------------- .area _DATA ;-------------------------------------------------------- ; ram data ;-------------------------------------------------------- .area _INITIALIZED ;-------------------------------------------------------- ; absolute external ram data ;-------------------------------------------------------- .area _DABS (ABS) ;-------------------------------------------------------- ; global & static initialisations ;-------------------------------------------------------- .area _HOME .area _GSINIT .area _GSFINAL .area _GSINIT ;-------------------------------------------------------- ; Home ;-------------------------------------------------------- .area _HOME .area _HOME ;-------------------------------------------------------- ; code ;-------------------------------------------------------- .area _CODE ;app.c:5: int main() { ; --------------------------------- ; Function main ; --------------------------------- _main:: ;app.c:6: return 0; ld hl, #0x0000 ;app.c:7: } ret ;app.c:10: void test(struct obj **arr, int n) { ; --------------------------------- ; Function test ; --------------------------------- _test:: push ix ld ix,#0 add ix,sp ;app.c:11: struct obj *p = *arr++; ld l, 4 (ix) ld h, 5 (ix) ld c, (hl) inc hl ld b, (hl) ;app.c:12: while(1) { 00102$: ;app.c:13: p->x += p->dx; push bc pop iy inc iy inc iy ld e, 0 (iy) ld d, 1 (iy) ld l, c ld h, b ld a, (hl) inc hl ld h, (hl) ld l, a add hl, de ld 0 (iy), l ld 1 (iy), h jr 00102$ ;app.c:15: } pop ix ret ;app.c:26: void test1(node *p) { ; --------------------------------- ; Function test1 ; --------------------------------- _test1:: push ix ld ix,#0 add ix,sp push af ;app.c:27: while(p) { 00101$: ld a, 5 (ix) or a, 4 (ix) jr Z,00104$ ;app.c:28: p->x += p->dx; ld c, 4 (ix) ld b, 5 (ix) ld hl, #0x0004 add hl, bc ex (sp), hl pop hl push hl ld e, (hl) inc hl ld d, (hl) ld l, c ld h, b inc hl inc hl ld a, (hl) inc hl ld h, (hl) ld l, a add hl, de ex de, hl pop hl push hl ld (hl), e inc hl ld (hl), d ;app.c:29: p->y += p->dy; ld hl, #0x0008 add hl, bc ex (sp), hl pop hl push hl ld e, (hl) inc hl ld d, (hl) push bc pop iy ld l, 6 (iy) ld h, 7 (iy) add hl, de ex de, hl pop hl push hl ld (hl), e inc hl ld (hl), d ;app.c:30: p->z += p->dz; ld hl, #0x000c add hl, bc ex (sp), hl pop hl push hl ld e, (hl) inc hl ld d, (hl) push bc pop iy ld l, 10 (iy) ld h, 11 (iy) add hl, de ex de, hl pop hl push hl ld (hl), e inc hl ld (hl), d ;app.c:31: p = p->next; ld a, (bc) ld 4 (ix), a inc bc ld a, (bc) ld 5 (ix), a jr 00101$ 00104$: ;app.c:33: } ld sp, ix pop ix ret ;app.c:35: void test2(node *p0) { ; --------------------------------- ; Function test2 ; --------------------------------- _test2:: push ix ld ix,#0 add ix,sp ;app.c:36: node *p = p0; ld c, 4 (ix) ld b, 5 (ix) ;app.c:38: while(p) { 00101$: ld a, b or a, c jr Z,00104$ ;app.c:39: q = &(p->dx); push bc pop iy inc iy inc iy ;app.c:40: w = *q; q++; w2 = *q; w2 += w; *q = w2; q++; ld e, 0 (iy) ld d, 1 (iy) inc iy inc iy ld l, 0 (iy) ld h, 1 (iy) add hl, de ld 0 (iy), l ld 1 (iy), h inc iy inc iy ;app.c:41: w = *q; q++; w2 = *q; w2 += w; *q = w2; q++; ld e, 0 (iy) ld d, 1 (iy) inc iy inc iy ld l, 0 (iy) ld h, 1 (iy) add hl, de ld 0 (iy), l ld 1 (iy), h inc iy inc iy ;app.c:42: w = *q; q++; w2 = *q; w2 += w; *q = w2; ld e, 0 (iy) ld d, 1 (iy) inc iy inc iy ld l, 0 (iy) ld h, 1 (iy) add hl, de ld 0 (iy), l ld 1 (iy), h ;app.c:43: p = p->next; ld l, c ld h, b ld c, (hl) inc hl ld b, (hl) jr 00101$ 00104$: ;app.c:45: } pop ix ret .area _CODE .area _INITIALIZER .area _CABS (ABS)
Thanks @hit9918, I'll save this file as an example and will add supporting SDCC output to my to do list!
Also, I just made a new release (MDL alpha v3): https://github.com/santiontanon/mdlz80optimizer/releases/tag...
The main update is fixing the issues you guys identified above:
- that incorrect optimization does not happen any more
- @grauw: I have tried it now on your "vgmplay-msx" project, and I got it to run there. It is a large project, so it reports saving about 40 bytes. I don't think I have the "section" keyword handled well though (so parsing the source ends in one of the "ERROR" statements in your source code). But at least it reads it now, resolves all the macros and gets to the point of running the optimizer! Might be worth comparing the symbol table generated by Glass with that generated by MDL to see if there are disagreements...
Also, TheNestruo brought a little bit of sanity to the code-base by refactoring a lot of the underlying Java infrastructure. So, a few things should be more robust now. Thanks a lot!
There is still an outstanding issue when optimization happens in code generated by a macro defined in a different file, as it is hard to report filename/number of where did the optimization happen. So, it might be hard to map the output of the optimizer to the original source code in projects that heavily rely on macros. I need to figure out what's the best way to report that...
Ah, I forgot, in the new release, there is a new "-a" option that writes an output file like this.
For example, with this call:
java -jar mdl.jar xspelunker/src/spelunk-main.asm -po -a annotations.asm
I get a file called annotations.txt
with this content:
xspelunker/src/spelunk-gfx.asm 45 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 569 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 926 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 1281 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 2034 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 2050 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 2072 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 2084 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 2103 warning Label defined without a colon. xspelunker/src/spelunk-player.asm 2115 warning Label defined without a colon. xspelunker/src/spelunk-player-bullets.asm 340 warning Label defined without a colon. xspelunker/src/spelunk-player-bullets.asm 346 warning Label defined without a colon. xspelunker/src/spelunk-player-bullets.asm 354 warning Label defined without a colon. xspelunker/src/spelunk-player-bullets.asm 368 warning Label defined without a colon. xspelunker/src/spelunk-player-bullets.asm 376 warning Label defined without a colon. xspelunker/src/spelunk-pcg.asm 1420 warning Label defined without a colon. xspelunker/src/spelunk-pcg.asm 2009 warning Use of confusing z80 'jp (reg)' syntax, rather than the more accurate 'jp reg'. xspelunker/src/spelunk-player-bullets.asm 561 optimization cp 0 -> or a xspelunker/src/spelunk-player-bullets.asm 165 optimization cp 1 -> dec a xspelunker/src/spelunk-gui.asm 84 optimization cp 1 -> dec a xspelunker/src/spelunk-pcg.asm 186 optimization cp 1 -> dec a xspelunker/src/spelunk-config.asm 42 optimization cp 1 -> dec a xspelunker/src/spelunk-player.asm 136 optimization unused ld reg,? xspelunker/src/spelunk-player.asm 751 optimization unused ld reg,? xspelunker/src/spelunk-pcg.asm 141 optimization unused ld reg,? xspelunker/src/spelunk-pcg.asm 1754 optimization ld a,n; ld (hl),a -> ld (hl),n xspelunker/src/spelunk-player.asm 604 optimization dec b; jr nz,label -> djnz label xspelunker/src/spelunk-pcg.asm 667 optimization dec b; jr nz,label -> djnz label xspelunker/src/spelunk-enemies.asm 743 optimization ld a,reg; neg -> xor a; sub reg xspelunker/src/spelunk-enemies.asm 750 optimization ld a,reg; neg -> xor a; sub reg
This is my first attempt at generating some output that could be parsed by a plugin in some editor like Sublime/VSCode, and show in-editor optimization annotations. So, each line is of the form "filename tab line-number tab tag tab message" to be easily parseable, one message per line. Of course, just first attempt, not sure if this is the right or most useful format. But again, it's a start
Note: also, if you really like defining labels without colons in assembler and do not want those warnings, you can deactivate them with a flag, of course (and the warning about jp (hl) only shows up if you have selected a dialect that supports jp hl instead)
I was trying to test the optimizer with the sources I have here... but I ran into several problems. No cake for me
I'll open the proper issues as soon as possible, but here's a quick summary:
- My own code (tniASM 0.45) failed because it uses ZX7, and dzx7_standard.asm uses undocumented instruction "SLL E" aka "SLS E"
- "org $4000, $bfff" also caused the parser to fail (or maybe it was "ds $4010 - $, $00" or a similar construct; can't remember)
- My particular usage of relative include paths was a little bit troublesome (kinda fixed it but...)
- MetalGear disassembly failed because of STRUCT keyword
- Other asMSX projects I had in my HDD failed because "the macro .rom was not expanded"
- Previously, those asMSX projects failed because of lack of "skip" support in ".incbin" support (fixed) and alternative syntax for several instructions, such as "EX HL,DE" (also fixed)
- I run into a stack overflow problem in some classic game disassemblies, between SourceCode.getAddress and SourceCode.getAddressAfter (this seem to be fixed now)
I'll try to provide a more precise description of the problems (as well as source code examples) as issues (or even better, pull requests!)
it would be cool if it could deal SDCC code
;app.c:12: while(1) { 00102$: ;app.c:13: p->x += p->dx; push bc pop iy inc iy inc iy ld e, 0 (iy) ld d, 1 (iy) ld l, c ld h, b ld a, (hl) inc hl ld h, (hl) ld l, a add hl, de ld 0 (iy), l ld 1 (iy), h jr 00102$ ;app.c:15: } -- 199 cycles
This could be replaced by
ld l,c ld h,b ld e,(hl) inc hl ld d,(hl) inc hl ld c,(hl) inc hl ld b,(hl) ex de,hl add hl,bc ex de,hl ld (hl),d dec hl ld (hl),e