core: factor out code for estimating the rendering time budget

Add a render_statistics type to encapsulate all the statistics done on
rendering times. And use that to estimate the time budget for rendering
and frame pacing.

Tweak the rolling window utilities a bit so we can reuse one rolling
window for multiple statistics.

Signed-off-by: Yuxuan Shui <yshuiv7@gmail.com>
This commit is contained in:
Yuxuan Shui 2023-06-10 14:08:49 +01:00
parent 336cb0917a
commit 8e1f3c92f5
No known key found for this signature in database
GPG Key ID: D3A4405BE6CC17F4
8 changed files with 443 additions and 164 deletions

View File

@ -176,6 +176,20 @@ void paint_all_new(session_t *ps, struct managed_win *t) {
region_t reg_shadow_clip;
pixman_region32_init(&reg_shadow_clip);
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
auto now_us = (uint64_t)(now.tv_sec * 1000000 + now.tv_nsec / 1000);
if (ps->next_render > 0) {
log_trace("Render schedule deviation: %ld us (%s) %ld %ld",
labs((int64_t)now_us - (int64_t)ps->next_render),
now_us < ps->next_render ? "early" : "late", now_us,
ps->next_render);
ps->last_schedule_delay = 0;
if (now_us > ps->next_render) {
ps->last_schedule_delay = now_us - ps->next_render;
}
}
if (ps->backend_data->ops->prepare) {
ps->backend_data->ops->prepare(ps->backend_data, &reg_paint);
}

View File

@ -58,6 +58,7 @@
#include "list.h"
#include "region.h"
#include "render.h"
#include "statistics.h"
#include "types.h"
#include "utils.h"
#include "win_defs.h"
@ -249,13 +250,16 @@ typedef struct session {
/// When the currently rendered frame will be displayed.
/// 0 means there is no pending frame.
uint64_t target_msc;
/// When did we render our last frame.
uint64_t last_render;
/// The delay between when the last frame was scheduled to be rendered, and when
/// the render actually started.
uint64_t last_schedule_delay;
/// When do we want our next frame to start rendering.
uint64_t next_render;
/// Whether we can perform frame pacing.
bool frame_pacing;
struct rolling_avg *frame_time;
struct rolling_max *render_stats;
/// Render statistics
struct render_statistics render_stats;
// === Operation related ===
/// Flags related to the root window

View File

@ -9,7 +9,7 @@ base_deps = [
srcs = [ files('picom.c', 'win.c', 'c2.c', 'x.c', 'config.c', 'vsync.c', 'utils.c',
'diagnostic.c', 'string_utils.c', 'render.c', 'kernel.c', 'log.c',
'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c') ]
'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c', 'statistics.c') ]
picom_inc = include_directories('.')
cflags = []

View File

@ -16,6 +16,7 @@
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
@ -43,7 +44,6 @@
#endif
#include "backend/backend.h"
#include "c2.h"
#include "config.h"
#include "diagnostic.h"
#include "log.h"
#include "region.h"
@ -60,6 +60,7 @@
#include "file_watch.h"
#include "list.h"
#include "options.h"
#include "statistics.h"
#include "uthash_extra.h"
/// Get session_t pointer from a pointer to a member of session_t
@ -177,8 +178,17 @@ static inline struct managed_win *find_win_all(session_t *ps, const xcb_window_t
/// 4. otherwise, we schedule a render for that target frame. we use past statistics about
/// how long our renders took to figure out when to start rendering. we start rendering
/// at the latest point of time possible to still hit the target frame.
void schedule_render(session_t *ps) {
///
/// The `triggered_by_timer` parameter is used to indicate whether this function is
/// triggered by a steady timer, i.e. we are rendering for each vblank. The other case is
/// when we stop rendering for a while because there is no changes on screen, then
/// something changed and schedule_render is triggered by a DamageNotify. The idea is that
/// when the schedule is triggered by a steady timer, schedule_render will be called at a
/// predictable offset into each vblank.
void schedule_render(session_t *ps, bool triggered_by_vblank) {
double delay_s = 0;
ps->next_render = 0;
if (!ps->frame_pacing || !ps->redirected) {
// Not doing frame pacing, schedule a render immediately, if not already
// scheduled.
@ -205,59 +215,77 @@ void schedule_render(session_t *ps) {
// We missed our target, push it back one frame
ps->target_msc = ps->last_msc + 1;
}
log_trace("Still rendering for target frame %ld, not scheduling another "
"render",
ps->target_msc);
return;
}
if (ps->target_msc > ps->last_msc) {
// Render for the target frame is completed, and has yet to be displayed.
// Render for the target frame is completed, but is yet to be displayed.
// Don't schedule another render.
log_trace("Target frame %ld is in the future, and we have already "
"rendered, last msc: %d",
ps->target_msc, (int)ps->last_msc);
return;
}
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
auto now_us = (uint64_t)now.tv_sec * 1000000 + (uint64_t)now.tv_nsec / 1000;
if (triggered_by_vblank) {
log_trace("vblank schedule delay: %ld us", now_us - ps->last_msc_instant);
}
int render_time_us =
(int)(render_time.tv_sec * 1000000L + render_time.tv_nsec / 1000L);
if (ps->target_msc == ps->last_msc) {
// The frame has just been displayed, record its render time;
log_trace("Last render call took: %d us, rolling_max: %d us",
render_time_us, rolling_max_get_max(ps->render_stats));
rolling_max_push(ps->render_stats, render_time_us);
log_trace("Last render call took: %d (gpu) + %d (cpu) us, "
"last_msc: %" PRIu64,
render_time_us, (int)ps->last_schedule_delay, ps->last_msc);
render_statistics_add_render_time_sample(
&ps->render_stats, render_time_us + (int)ps->last_schedule_delay);
ps->target_msc = 0;
ps->last_schedule_delay = 0;
}
// TODO(yshui): why 1500?
const int SLACK = 1500;
int render_time_estimate = rolling_max_get_max(ps->render_stats);
auto frame_time = (uint64_t)rolling_avg_get_avg(ps->frame_time);
if (render_time_estimate < 0 || frame_time == 0) {
// We don't have render time, and/or frame time estimates, maybe there's
unsigned int divisor = 0;
auto render_budget = render_statistics_get_budget(&ps->render_stats, &divisor);
auto frame_time = render_statistics_get_vblank_time(&ps->render_stats);
if (frame_time == 0) {
// We don't have enough data for render time estimates, maybe there's
// no frame rendered yet, or the backend doesn't support render timing
// information, schedule render immediately.
ps->target_msc = ps->last_msc + 1;
goto schedule;
}
render_time_estimate += SLACK;
auto minimal_target_us = now_us + (uint64_t)render_time_estimate;
auto frame_delay = (uint64_t)ceil(
(double)(minimal_target_us - ps->last_msc_instant) / (double)frame_time);
auto deadline = ps->last_msc_instant + frame_delay * frame_time;
ps->target_msc = ps->last_msc + frame_delay;
auto delay = deadline - (uint64_t)render_time_estimate - now_us;
delay_s = (double)delay / 1000000.0;
if (delay_s > 1) {
log_warn("Delay too long: %f s, render_time: %d us, frame_time: %" PRIu64
" us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " us",
delay_s, render_time_estimate, frame_time, now_us, deadline);
const auto deadline = ps->last_msc_instant + (unsigned long)divisor * frame_time;
unsigned int available = 0;
if (deadline > now_us) {
available = (unsigned int)(deadline - now_us);
}
log_trace("Delay: %lu us, last_msc: %" PRIu64 ", render_time: %d, frame_time: "
"%" PRIu64 ", now_us: %" PRIu64 ", next_msc: %" PRIu64,
delay, ps->last_msc_instant, render_time_estimate, frame_time, now_us,
deadline);
ps->target_msc = ps->last_msc + divisor;
if (available > render_budget) {
delay_s = (double)(available - render_budget) / 1000000.0;
ps->next_render = deadline - render_budget;
} else {
delay_s = 0;
ps->next_render = now_us;
}
if (delay_s > 1) {
log_warn("Delay too long: %f s, render_budget: %d us, frame_time: "
"%" PRIu32 " us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " u"
"s",
delay_s, render_budget, frame_time, now_us, deadline);
}
log_trace("Delay: %.6lf s, last_msc: %" PRIu64 ", render_budget: %d, frame_time: "
"%" PRIu32 ", now_us: %" PRIu64 ", next_msc: %" PRIu64 ", "
"target_msc: %" PRIu64 ", divisor: %d",
delay_s, ps->last_msc_instant, render_budget, frame_time, now_us,
deadline, ps->target_msc, divisor);
schedule:
assert(!ev_is_active(&ps->draw_timer));
@ -282,7 +310,7 @@ void queue_redraw(session_t *ps) {
// If frame pacing is not enabled, pretend this is false.
// If --benchmark is used, redraw is always queued
if (!ps->redraw_needed && !ps->o.benchmark) {
schedule_render(ps);
schedule_render(ps, false);
}
ps->redraw_needed = true;
}
@ -758,7 +786,6 @@ static void configure_root(session_t *ps) {
}
force_repaint(ps);
}
return;
}
static void handle_root_flags(session_t *ps) {
@ -1439,10 +1466,9 @@ static bool redirect_start(session_t *ps) {
// states.
ps->last_msc_instant = 0;
ps->last_msc = 0;
ps->last_render = 0;
ps->last_schedule_delay = 0;
ps->target_msc = 0;
rolling_avg_reset(ps->frame_time);
rolling_max_reset(ps->render_stats);
render_statistics_reset(&ps->render_stats);
} else if (ps->frame_pacing) {
log_error("Present extension is not supported, frame pacing disabled.");
ps->frame_pacing = false;
@ -1543,11 +1569,12 @@ handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_
if (ps->last_msc_instant != 0) {
auto frame_count = cne->msc - ps->last_msc;
int frame_time = (int)((cne->ust - ps->last_msc_instant) / frame_count);
rolling_avg_push(ps->frame_time, frame_time);
log_trace("Frame count %lu, frame time: %d us, rolling average: %lf us, "
render_statistics_add_vblank_time_sample(&ps->render_stats, frame_time);
log_trace("Frame count %lu, frame time: %d us, rolling average: %u us, "
"msc: %" PRIu64 ", offset: %d us",
frame_count, frame_time, rolling_avg_get_avg(ps->frame_time),
cne->ust, (int)drift);
frame_count, frame_time,
render_statistics_get_vblank_time(&ps->render_stats), cne->ust,
(int)drift);
} else if (drift > 1000000 && ps->frame_pacing) {
// This is the first MSC event we receive, let's check if the timestamps
// align with the monotonic clock. If not, disable frame pacing because we
@ -1560,7 +1587,7 @@ handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_
ps->last_msc_instant = cne->ust;
ps->last_msc = cne->msc;
if (ps->redraw_needed) {
schedule_render(ps);
schedule_render(ps, true);
}
}
@ -1651,6 +1678,8 @@ static void tmout_unredir_callback(EV_P attr_unused, ev_timer *w, int revents at
}
static void fade_timer_callback(EV_P attr_unused, ev_timer *w, int revents attr_unused) {
// TODO(yshui): do we still need the fade timer? we queue redraw automatically in
// draw_callback_impl if animation is running.
session_t *ps = session_ptr(w, fade_timer);
queue_redraw(ps);
}
@ -1778,12 +1807,6 @@ static void draw_callback_impl(EV_P_ session_t *ps, int revents attr_unused) {
}
log_trace("Render end");
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
uint64_t now_us =
(uint64_t)now.tv_sec * 1000000ULL + (uint64_t)now.tv_nsec / 1000;
ps->last_render = now_us;
ps->first_frame = false;
paint++;
if (ps->o.benchmark && paint >= ps->o.benchmark) {
@ -2004,8 +2027,7 @@ static session_t *session_init(int argc, char **argv, Display *dpy,
pixman_region32_init(&ps->screen_reg);
// TODO(yshui) investigate what's the best window size
ps->render_stats = rolling_max_new(128);
ps->frame_time = rolling_avg_new(16);
render_statistics_init(&ps->render_stats, 128);
ps->pending_reply_tail = &ps->pending_reply_head;
@ -2642,8 +2664,7 @@ static void session_destroy(session_t *ps) {
free(ps->o.glx_fshader_win_str);
x_free_randr_info(ps);
rolling_avg_destroy(ps->frame_time);
rolling_max_destroy(ps->render_stats);
render_statistics_destroy(&ps->render_stats);
// Release custom window shaders
free(ps->o.window_shader_fg);

100
src/statistics.c Normal file
View File

@ -0,0 +1,100 @@
//! Rendering statistics
//!
//! Tracks how long it takes to render a frame, for measuring performance, and for pacing
//! the frames.
#include "statistics.h"
#include "log.h"
#include "utils.h"
void render_statistics_init(struct render_statistics *rs, int window_size) {
*rs = (struct render_statistics){0};
rolling_window_init(&rs->render_times, window_size);
rolling_quantile_init_with_tolerance(&rs->render_time_quantile, window_size,
/* q */ 0.98, /* tolerance */ 0.01);
}
void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us) {
auto sample_sd = sqrt(cumulative_mean_and_var_get_var(&rs->vblank_time_us));
auto current_estimate = render_statistics_get_vblank_time(rs);
if (current_estimate != 0 && fabs((double)time_us - current_estimate) > sample_sd * 3) {
// Deviated from the mean by more than 3 sigma (p < 0.003)
log_debug("vblank time outlier: %d %f %f", time_us, rs->vblank_time_us.mean,
cumulative_mean_and_var_get_var(&rs->vblank_time_us));
// An outlier sample, this could mean things like refresh rate changes, so
// we reset the statistics. This could also be benign, but we like to be
// cautious.
cumulative_mean_and_var_init(&rs->vblank_time_us);
}
if (rs->vblank_time_us.mean != 0) {
auto nframes_in_10_seconds =
(unsigned int)(10. * 1000000. / rs->vblank_time_us.mean);
if (rs->vblank_time_us.n > 20 && rs->vblank_time_us.n > nframes_in_10_seconds) {
// We collected 10 seconds worth of samples, we assume the
// estimated refresh rate is stable. We will still reset the
// statistics if we get an outlier sample though, see above.
return;
}
}
cumulative_mean_and_var_update(&rs->vblank_time_us, time_us);
}
void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us) {
int oldest;
if (rolling_window_push_back(&rs->render_times, time_us, &oldest)) {
rolling_quantile_pop_front(&rs->render_time_quantile, oldest);
}
rolling_quantile_push_back(&rs->render_time_quantile, time_us);
}
/// How much time budget we should give to the backend for rendering, in microseconds.
///
/// A `divisor` is also returned, indicating the target framerate. The divisor is
/// the number of vblanks we should wait between each frame. A divisor of 1 means
/// full framerate, 2 means half framerate, etc.
unsigned int
render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor) {
if (rs->render_times.nelem < rs->render_times.window_size) {
// No valid render time estimates yet. Assume maximum budget.
*divisor = 1;
return UINT_MAX;
}
// N-th percentile of render times, see render_statistics_init for N.
auto render_time_percentile =
rolling_quantile_estimate(&rs->render_time_quantile, &rs->render_times);
auto vblank_time_us = render_statistics_get_vblank_time(rs);
if (vblank_time_us == 0) {
// We don't have a good estimate of the vblank time yet, so we
// assume we can finish in one vblank.
*divisor = 1;
} else {
*divisor =
(unsigned int)(render_time_percentile / rs->vblank_time_us.mean + 1);
}
return (unsigned int)render_time_percentile;
}
unsigned int render_statistics_get_vblank_time(struct render_statistics *rs) {
if (rs->vblank_time_us.n <= 20 || rs->vblank_time_us.mean < 100) {
// Not enough samples yet, or the vblank time is too short to be
// meaningful. Assume maximum budget.
return 0;
}
return (unsigned int)rs->vblank_time_us.mean;
}
void render_statistics_reset(struct render_statistics *rs) {
rolling_window_reset(&rs->render_times);
rolling_quantile_reset(&rs->render_time_quantile);
rs->vblank_time_us = (struct cumulative_mean_and_var){0};
}
void render_statistics_destroy(struct render_statistics *rs) {
render_statistics_reset(rs);
rolling_window_destroy(&rs->render_times);
rolling_quantile_destroy(&rs->render_time_quantile);
}

33
src/statistics.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include "utils.h"
#define NTIERS (3)
struct render_statistics {
/// Rolling window of rendering times (in us) and the tiers they belong to.
/// We keep track of the tiers because the vblank time estimate can change over
/// time.
struct rolling_window render_times;
/// Estimate the 95-th percentile of rendering times
struct rolling_quantile render_time_quantile;
/// Time between each vblanks
struct cumulative_mean_and_var vblank_time_us;
};
void render_statistics_init(struct render_statistics *rs, int window_size);
void render_statistics_reset(struct render_statistics *rs);
void render_statistics_destroy(struct render_statistics *rs);
void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us);
void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us);
/// How much time budget we should give to the backend for rendering, in microseconds.
///
/// A `divisor` is also returned, indicating the target framerate. The divisor is
/// the number of vblanks we should wait between each frame. A divisor of 1 means
/// full framerate, 2 means half framerate, etc.
unsigned int
render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor);
unsigned int render_statistics_get_vblank_time(struct render_statistics *rs);

View File

@ -48,7 +48,43 @@ int next_power_of_two(int n) {
return n;
}
/// Track the rolling maximum of a stream of integers.
void rolling_window_destroy(struct rolling_window *rw) {
free(rw->elem);
rw->elem = NULL;
}
void rolling_window_reset(struct rolling_window *rw) {
rw->nelem = 0;
rw->elem_head = 0;
}
void rolling_window_init(struct rolling_window *rw, int size) {
rw->elem = ccalloc(size, int);
rw->window_size = size;
rolling_window_reset(rw);
}
int rolling_window_pop_front(struct rolling_window *rw) {
assert(rw->nelem > 0);
auto ret = rw->elem[rw->elem_head];
rw->elem_head = (rw->elem_head + 1) % rw->window_size;
rw->nelem--;
return ret;
}
bool rolling_window_push_back(struct rolling_window *rw, int val, int *front) {
bool full = rw->nelem == rw->window_size;
if (full) {
*front = rolling_window_pop_front(rw);
}
rw->elem[(rw->elem_head + rw->nelem) % rw->window_size] = val;
rw->nelem++;
return full;
}
/// Track the maximum member of a FIFO queue of integers. Integers are pushed to the back
/// and popped from the front, the maximum of the current members in the queue is
/// tracked.
struct rolling_max {
/// A priority queue holding the indices of the maximum element candidates.
/// The head of the queue is the index of the maximum element.
@ -59,32 +95,26 @@ struct rolling_max {
/// it's called the "original" indices.
int *p;
int p_head, np;
/// The elemets
int *elem;
int elem_head, nelem;
int window_size;
/// The maximum number of in flight elements.
int capacity;
};
void rolling_max_destroy(struct rolling_max *rm) {
free(rm->elem);
free(rm->p);
free(rm);
}
struct rolling_max *rolling_max_new(int size) {
struct rolling_max *rolling_max_new(int capacity) {
auto rm = ccalloc(1, struct rolling_max);
if (!rm) {
return NULL;
}
rm->p = ccalloc(size, int);
rm->elem = ccalloc(size, int);
rm->window_size = size;
if (!rm->p || !rm->elem) {
rm->p = ccalloc(capacity, int);
if (!rm->p) {
goto err;
}
rm->capacity = capacity;
return rm;
@ -96,33 +126,21 @@ err:
void rolling_max_reset(struct rolling_max *rm) {
rm->p_head = 0;
rm->np = 0;
rm->nelem = 0;
rm->elem_head = 0;
}
void rolling_max_push(struct rolling_max *rm, int val) {
#define IDX(n) ((n) % rm->window_size)
if (rm->nelem == rm->window_size) {
auto old_head = rm->elem_head;
// Discard the oldest element.
// rm->elem.pop_front();
rm->nelem--;
rm->elem_head = IDX(rm->elem_head + 1);
// Remove discarded element from the priority queue too.
assert(rm->np);
if (rm->p[rm->p_head] == old_head) {
// rm->p.pop_front()
rm->p_head = IDX(rm->p_head + 1);
rm->np--;
}
#define IDX(n) ((n) % rm->capacity)
/// Remove the oldest element in the window. The caller must maintain the list of elements
/// themselves, i.e. the behavior is undefined if `front` does not 1match the oldest
/// element.
void rolling_max_pop_front(struct rolling_max *rm, int front) {
if (rm->p[rm->p_head] == front) {
// rm->p.pop_front()
rm->p_head = IDX(rm->p_head + 1);
rm->np--;
}
}
// Add the new element to the queue.
// rm->elem.push_back(val)
rm->elem[IDX(rm->elem_head + rm->nelem)] = val;
rm->nelem++;
void rolling_max_push_back(struct rolling_max *rm, int val) {
// Update the prority queue.
// Remove all elements smaller than the new element from the queue. Because
// the new element will become the maximum element before them, and since they
@ -130,7 +148,7 @@ void rolling_max_push(struct rolling_max *rm, int val) {
// element, so they will never become the maximum element.
while (rm->np) {
int p_tail = IDX(rm->p_head + rm->np - 1);
if (rm->elem[rm->p[p_tail]] > val) {
if (rm->p[p_tail] > val) {
break;
}
// rm->p.pop_back()
@ -138,108 +156,119 @@ void rolling_max_push(struct rolling_max *rm, int val) {
}
// Add the new element to the end of the queue.
// rm->p.push_back(rm->start_index + rm->nelem - 1)
rm->p[IDX(rm->p_head + rm->np)] = IDX(rm->elem_head + rm->nelem - 1);
assert(rm->np < rm->capacity);
rm->p[IDX(rm->p_head + rm->np)] = val;
rm->np++;
#undef IDX
}
#undef IDX
int rolling_max_get_max(struct rolling_max *rm) {
if (rm->np == 0) {
return INT_MIN;
}
return rm->elem[rm->p[rm->p_head]];
return rm->p[rm->p_head];
}
TEST_CASE(rolling_max_test) {
#define NELEM 15
struct rolling_window queue;
rolling_window_init(&queue, 3);
auto rm = rolling_max_new(3);
const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0};
const int expected_max[NELEM] = {1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 5, 4, 3, 2};
int max[NELEM] = {0};
for (int i = 0; i < NELEM; i++) {
rolling_max_push(rm, data[i]);
int front;
bool full = rolling_window_push_back(&queue, data[i], &front);
if (full) {
rolling_max_pop_front(rm, front);
}
rolling_max_push_back(rm, data[i]);
max[i] = rolling_max_get_max(rm);
}
rolling_window_destroy(&queue);
rolling_max_destroy(rm);
TEST_TRUE(memcmp(max, expected_max, sizeof(max)) == 0);
#undef NELEM
}
/// A rolling average of a stream of integers.
struct rolling_avg {
/// The sum of the elements in the window.
int64_t sum;
// Find the k-th smallest element in an array.
int quickselect(int *elems, int nelem, int k) {
int l = 0, r = nelem; // [l, r) is the range of candidates
while (l != r) {
int pivot = elems[l];
int i = l, j = r;
while (i < j) {
while (i < j && elems[--j] >= pivot) {
}
elems[i] = elems[j];
while (i < j && elems[++i] <= pivot) {
}
elems[j] = elems[i];
}
elems[i] = pivot;
/// The elements in the window.
int *elem;
int head, nelem;
if (i == k) {
break;
}
int window_size;
};
struct rolling_avg *rolling_avg_new(int size) {
auto rm = ccalloc(1, struct rolling_avg);
if (!rm) {
return NULL;
if (i < k) {
l = i + 1;
} else {
r = i;
}
}
rm->elem = ccalloc(size, int);
rm->window_size = size;
if (!rm->elem) {
free(rm);
return NULL;
}
return rm;
return elems[k];
}
void rolling_avg_destroy(struct rolling_avg *rm) {
free(rm->elem);
free(rm);
void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk) {
*rq = (struct rolling_quantile){0};
rq->tmp_buffer = malloc(sizeof(int) * (size_t)capacity);
rq->capacity = capacity;
rq->min_target_rank = mink;
rq->max_target_rank = maxk;
}
void rolling_avg_reset(struct rolling_avg *ra) {
ra->sum = 0;
ra->nelem = 0;
ra->head = 0;
void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size,
double target, double tolerance) {
rolling_quantile_init(rq, window_size, (int)((target - tolerance) * window_size),
(int)((target + tolerance) * window_size));
}
void rolling_avg_push(struct rolling_avg *ra, int val) {
if (ra->nelem == ra->window_size) {
// Discard the oldest element.
// rm->elem.pop_front();
ra->sum -= ra->elem[ra->head % ra->window_size];
ra->nelem--;
ra->head = (ra->head + 1) % ra->window_size;
void rolling_quantile_reset(struct rolling_quantile *rq) {
rq->current_rank = 0;
rq->estimate = 0;
}
void rolling_quantile_destroy(struct rolling_quantile *rq) {
free(rq->tmp_buffer);
}
int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements) {
if (rq->current_rank < rq->min_target_rank || rq->current_rank > rq->max_target_rank) {
if (elements->nelem != elements->window_size) {
return INT_MIN;
}
// Re-estimate the quantile.
assert(elements->nelem <= rq->capacity);
rolling_window_copy_to_array(elements, rq->tmp_buffer);
const int target_rank =
rq->min_target_rank + (rq->max_target_rank - rq->min_target_rank) / 2;
rq->estimate = quickselect(rq->tmp_buffer, elements->nelem, target_rank);
rq->current_rank = target_rank;
}
// Add the new element to the queue.
// rm->elem.push_back(val)
ra->elem[(ra->head + ra->nelem) % ra->window_size] = val;
ra->sum += val;
ra->nelem++;
return rq->estimate;
}
double rolling_avg_get_avg(struct rolling_avg *ra) {
if (ra->nelem == 0) {
return 0;
void rolling_quantile_push_back(struct rolling_quantile *rq, int x) {
if (x <= rq->estimate) {
rq->current_rank++;
}
return (double)ra->sum / (double)ra->nelem;
}
TEST_CASE(rolling_avg_test) {
#define NELEM 15
auto rm = rolling_avg_new(3);
const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0};
const double expected_avg[NELEM] = {
1, 1.5, 2, 2, 8.0 / 3.0, 10.0 / 3.0, 11.0 / 3.0, 10.0 / 3.0,
11.0 / 3.0, 14.0 / 3.0, 5, 4, 3, 5.0 / 3.0, 2.0 / 3.0};
double avg[NELEM] = {0};
for (int i = 0; i < NELEM; i++) {
rolling_avg_push(rm, data[i]);
avg[i] = rolling_avg_get_avg(rm);
}
for (int i = 0; i < NELEM; i++) {
TEST_EQUAL(avg[i], expected_avg[i]);
void rolling_quantile_pop_front(struct rolling_quantile *rq, int x) {
if (x <= rq->estimate) {
rq->current_rank--;
}
}

View File

@ -17,6 +17,7 @@
#include <time.h>
#include "compiler.h"
#include "log.h"
#include "types.h"
#define ARR_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
@ -289,20 +290,97 @@ static inline void free_charpp(char **str) {
///
int next_power_of_two(int n);
struct rolling_window {
int *elem;
int elem_head, nelem;
int window_size;
};
void rolling_window_destroy(struct rolling_window *rw);
void rolling_window_reset(struct rolling_window *rw);
void rolling_window_init(struct rolling_window *rw, int size);
int rolling_window_pop_front(struct rolling_window *rw);
bool rolling_window_push_back(struct rolling_window *rw, int val, int *front);
/// Copy the contents of the rolling window to an array. The array is assumed to
/// have enough space to hold the contents of the rolling window.
static inline void attr_unused rolling_window_copy_to_array(struct rolling_window *rw,
int *arr) {
// The length from head to the end of the array
auto head_len = (size_t)(rw->window_size - rw->elem_head);
if (head_len >= (size_t)rw->nelem) {
memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * (size_t)rw->nelem);
} else {
auto tail_len = (size_t)((size_t)rw->nelem - head_len);
memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * head_len);
memcpy(arr + head_len, rw->elem, sizeof(int) * tail_len);
}
}
struct rolling_max;
struct rolling_max *rolling_max_new(int window_size);
struct rolling_max *rolling_max_new(int capacity);
void rolling_max_destroy(struct rolling_max *rm);
void rolling_max_reset(struct rolling_max *rm);
void rolling_max_push(struct rolling_max *rm, int val);
void rolling_max_pop_front(struct rolling_max *rm, int front);
void rolling_max_push_back(struct rolling_max *rm, int val);
int rolling_max_get_max(struct rolling_max *rm);
struct rolling_avg;
struct rolling_avg *rolling_avg_new(int window_size);
void rolling_avg_destroy(struct rolling_avg *ra);
void rolling_avg_reset(struct rolling_avg *ra);
void rolling_avg_push(struct rolling_avg *ra, int val);
double rolling_avg_get_avg(struct rolling_avg *ra);
/// Estimate the mean and variance of random variable X using Welford's online
/// algorithm.
struct cumulative_mean_and_var {
double mean;
double m2;
unsigned int n;
};
static inline attr_unused void
cumulative_mean_and_var_init(struct cumulative_mean_and_var *cmv) {
*cmv = (struct cumulative_mean_and_var){0};
}
static inline attr_unused void
cumulative_mean_and_var_update(struct cumulative_mean_and_var *cmv, double x) {
if (cmv->n == UINT_MAX) {
// We have too many elements, let's keep the mean and variance.
return;
}
cmv->n++;
double delta = x - cmv->mean;
cmv->mean += delta / (double)cmv->n;
cmv->m2 += delta * (x - cmv->mean);
}
static inline attr_unused double
cumulative_mean_and_var_get_var(struct cumulative_mean_and_var *cmv) {
if (cmv->n < 2) {
return 0;
}
return cmv->m2 / (double)(cmv->n - 1);
}
// Find the k-th smallest element in an array.
int quickselect(int *elems, int nelem, int k);
/// A naive quantile estimator.
///
/// Estimates the N-th percentile of a random variable X in a sliding window.
struct rolling_quantile {
int current_rank;
int min_target_rank, max_target_rank;
int estimate;
int capacity;
int *tmp_buffer;
};
void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk);
void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size,
double target, double tolerance);
void rolling_quantile_reset(struct rolling_quantile *rq);
void rolling_quantile_destroy(struct rolling_quantile *rq);
int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements);
void rolling_quantile_push_back(struct rolling_quantile *rq, int x);
void rolling_quantile_pop_front(struct rolling_quantile *rq, int x);
// Some versions of the Android libc do not have timespec_get(), use
// clock_gettime() instead.