this post was submitted on 29 Dec 2023
22 points (89.3% liked)

General Programming Discussion

7814 readers
10 users here now

A general programming discussion community.

Rules:

  1. Be civil.
  2. Please start discussions that spark conversation

Other communities

Systems

Functional Programming

Also related

founded 5 years ago
MODERATORS
 

If you use a compiled language, you should periodically look at Godbolt and see what your code is doing and what changes to your code will do in the compiled output.

In this case a positively insane way of calculating squares and cubes generates 311 lines of ARM assembler output that will swallow your memory. With even something as simple as -O1 on the command line it's replaced by one or two multiplications respectively. With -fwhole-program it removes the functions entirely and interlaces them into the loop in main().

Know your tools. It makes huge differences!

you are viewing a single comment's thread
view the rest of the comments
[โ€“] [email protected] 0 points 10 months ago (1 children)

Multiplication hurts? ๐Ÿ˜ฒ

cube:
        mul     r3, r0, r0
        mul     r0, r3, r0
        bx      lr
[โ€“] [email protected] 0 points 10 months ago

I mean it could hurt:

cube:
        push    {r4, r5, r6, r7, r8, r9, r10, fp}
        sub     sp, sp, #112
        add     r7, sp, #0
        str     r0, [r7, #92]
        mov     r3, sp
        mov     ip, r3
        ldr     r1, [r7, #92]
        ldr     r0, [r7, #92]
        ldr     r6, [r7, #92]
        subs    r3, r1, #1
        str     r3, [r7, #108]
        mov     r2, r1
        movs    r3, #0
        mov     r4, r2
        mov     r5, r3
        mov     r2, #0
        mov     r3, #0
        lsls    r3, r5, #3
        orr     r3, r3, r4, lsr #29
        lsls    r2, r4, #3
        subs    r3, r0, #1
        str     r3, [r7, #104]
        mov     r2, r1
        movs    r3, #0
        str     r2, [r7, #80]
        str     r3, [r7, #84]
        mov     r2, r0
        movs    r3, #0
        str     r2, [r7, #64]
        str     r3, [r7, #68]
        ldrd    r4, [r7, #80]
        mov     r3, r5
        ldr     r2, [r7, #64]
        mul     r2, r2, r3
        ldr     r3, [r7, #68]
        strd    r4, [r7, #80]
        ldr     r4, [r7, #80]
        mul     r3, r4, r3
        add     r3, r3, r2
        ldr     r2, [r7, #80]
        ldr     r4, [r7, #64]
        umull   r8, r9, r2, r4
        add     r3, r3, r9
        mov     r9, r3
        mov     r2, #0
        mov     r3, #0
        lsl     r3, r9, #3
        orr     r3, r3, r8, lsr #29
        lsl     r2, r8, #3
        subs    r3, r6, #1
        str     r3, [r7, #100]
        mov     r2, r1
        movs    r3, #0
        str     r2, [r7, #32]
        str     r3, [r7, #36]
        mov     r2, r0
        movs    r3, #0
        str     r2, [r7, #72]
        str     r3, [r7, #76]
        ldrd    r4, [r7, #32]
        mov     r3, r5
        ldrd    r8, [r7, #72]
        mov     r2, r8
        mul     r2, r2, r3
        strd    r8, [r7, #72]
        ldr     r3, [r7, #76]
        mov     r8, r4
        mov     r9, r5
        mov     r4, r8
        mul     r3, r4, r3
        add     r3, r3, r2
        mov     r2, r8
        ldr     r4, [r7, #72]
        umull   r10, fp, r2, r4
        add     r3, r3, fp
        mov     fp, r3
        mov     r2, r6
        movs    r3, #0
        str     r2, [r7, #24]
        str     r3, [r7, #28]
        ldrd    r4, [r7, #24]
        mov     r3, r4
        mul     r2, r3, fp
        mov     r3, r5
        mul     r3, r10, r3
        add     r3, r3, r2
        mov     r2, r4
        umull   r4, r2, r10, r2
        str     r2, [r7, #60]
        mov     r2, r4
        str     r2, [r7, #56]
        ldr     r2, [r7, #60]
        add     r3, r3, r2
        str     r3, [r7, #60]
        mov     r2, #0
        mov     r3, #0
        ldrd    r8, [r7, #56]
        mov     r4, r9
        lsls    r3, r4, #3
        mov     r4, r8
        orr     r3, r3, r4, lsr #29
        mov     r4, r8
        lsls    r2, r4, #3
        mov     r2, r1
        movs    r3, #0
        str     r2, [r7, #16]
        str     r3, [r7, #20]
        mov     r2, r0
        movs    r3, #0
        str     r2, [r7, #8]
        str     r3, [r7, #12]
        ldrd    r8, [r7, #16]
        mov     r3, r9
        ldrd    r10, [r7, #8]
        mov     r2, r10
        mul     r2, r2, r3
        mov     r3, fp
        mov     r4, r8
        mul     r3, r4, r3
        add     r3, r3, r2
        mov     r2, r8
        mov     r4, r10
        umull   r4, r2, r2, r4
        str     r2, [r7, #52]
        mov     r2, r4
        str     r2, [r7, #48]
        ldr     r2, [r7, #52]
        add     r3, r3, r2
        str     r3, [r7, #52]
        mov     r2, r6
        movs    r3, #0
        str     r2, [r7]
        str     r3, [r7, #4]
        ldrd    r8, [r7, #48]
        mov     r3, r9
        ldrd    r10, [r7]
        mov     r2, r10
        mul     r2, r2, r3
        mov     r3, fp
        mov     r4, r8
        mul     r3, r4, r3
        add     r3, r3, r2
        mov     r2, r8
        mov     r4, r10
        umull   r4, r2, r2, r4
        str     r2, [r7, #44]
        mov     r2, r4
        str     r2, [r7, #40]
        ldr     r2, [r7, #44]
        add     r3, r3, r2
        str     r3, [r7, #44]
        mov     r2, #0
        mov     r3, #0
        ldrd    r8, [r7, #40]
        mov     r4, r9
        lsls    r3, r4, #3
        mov     r4, r8
        orr     r3, r3, r4, lsr #29
        mov     r4, r8
        lsls    r2, r4, #3
        mov     r3, r1
        mov     r2, r0
        mul     r3, r2, r3
        mov     r2, r6
        mul     r3, r2, r3
        adds    r3, r3, #7
        lsrs    r3, r3, #3
        lsls    r3, r3, #3
        sub     sp, sp, r3
        mov     r3, sp
        str     r3, [r7, #96]
        mov     r3, r1
        mov     r2, r0
        mul     r3, r2, r3
        mov     r2, r6
        mul     r3, r2, r3
        mov     sp, ip
        mov     r0, r3
        adds    r7, r7, #112
        mov     sp, r7
        pop     {r4, r5, r6, r7, r8, r9, r10, fp}
        bx      lr