This is the only way that we've managed to reliably get a single
inc
instruction loop, by using inline assembly, e.g. on we do x86:loop:
inc %[i];
cmp %[max], %[i];
jb loop;
For 1s on P14s Ubuntu 25.04 GCC 14.2 -O0 x86_64 we need about 5 billion:
time ./inc_loop_asm.out 5000000000
c/inc_loop_asm.c
#include <stdlib.h>
#include <stdint.h>
int main(int argc, char **argv) {
uint64_t max, i;
if (argc > 1) {
max = strtoll(argv[1], NULL, 0);
} else {
max = 1;
}
i = 0;
#if defined(__x86_64__) || defined(__i386__)
__asm__ (
"start:"
"inc %[i];"
"cmp %[max], %[i];"
"jb start;"
: [i] "+r" (i)
: [max] "r" (max)
:
);
#endif
return i;
}