minor optimization: allocate worker array at runtime

this makes it possible to increase LZ4_NBWORKERS_MAX without memory cost
This commit is contained in:
Yann Collet 2024-07-06 04:05:28 -07:00
parent 04341f1974
commit 79e72beeb5
3 changed files with 12 additions and 6 deletions

View File

@ -35,14 +35,14 @@
/* Determines default nb of threads for compression /* Determines default nb of threads for compression
* Default value is 0, which means "auto" : * Default value is 0, which means "auto" :
* nb of threads is determined from detected local cpu. * nb of threads is determined from detected local cpu.
* Can also be selected at runtime using -T# command */ * Can be overridden at runtime using -T# command */
#ifndef LZ4_NBWORKERS_DEFAULT #ifndef LZ4_NBWORKERS_DEFAULT
# define LZ4_NBWORKERS_DEFAULT 0 # define LZ4_NBWORKERS_DEFAULT 0
#endif #endif
/* Maximum nb of compression threads that can selected at runtime */ /* Maximum nb of compression threads selectable at runtime */
#ifndef LZ4_NBWORKERS_MAX #ifndef LZ4_NBWORKERS_MAX
# define LZ4_NBWORKERS_MAX 125 # define LZ4_NBWORKERS_MAX 200
#endif #endif
/* Determines default lz4 block size when none provided. /* Determines default lz4 block size when none provided.

View File

@ -121,7 +121,7 @@ static void LZ4IO_finalTimeDisplay(TIME_t timeStart, clock_t cpuStart, unsigned
double const seconds = (double)(duration_ns + !duration_ns) / (double)1000000000.; double const seconds = (double)(duration_ns + !duration_ns) / (double)1000000000.;
double const cpuLoad_s = (double)(clock() - cpuStart) / CLOCKS_PER_SEC; double const cpuLoad_s = (double)(clock() - cpuStart) / CLOCKS_PER_SEC;
DISPLAYLEVEL(3,"Done in %.2f s ==> %.2f MiB/s (cpu load : %.0f%%)\n", seconds, DISPLAYLEVEL(3,"Done in %.2f s ==> %.2f MiB/s (cpu load : %.0f%%)\n", seconds,
size / seconds / 1024 / 1024, (double)size / seconds / 1024. / 1024.,
(cpuLoad_s / seconds) * 100.); (cpuLoad_s / seconds) * 100.);
} }
} }

View File

@ -75,7 +75,7 @@ void TPOOL_completeJobs(TPOOL_ctx* ctx) {
typedef struct TPOOL_ctx_s { typedef struct TPOOL_ctx_s {
HANDLE completionPort; HANDLE completionPort;
HANDLE workerThreads[LZ4_NBWORKERS_MAX]; HANDLE* workerThreads;
int nbWorkers; int nbWorkers;
int queueSize; int queueSize;
LONG numPendingJobs; LONG numPendingJobs;
@ -97,6 +97,7 @@ void TPOOL_free(TPOOL_ctx* ctx) {
for (int i = 0; i < ctx->nbWorkers; i++) { for (int i = 0; i < ctx->nbWorkers; i++) {
CloseHandle(ctx->workerThreads[i]); CloseHandle(ctx->workerThreads[i]);
} }
free(ctx->workerThreads);
CloseHandle(ctx->completionPort); CloseHandle(ctx->completionPort);
/* Clean up synchronization objects */ /* Clean up synchronization objects */
@ -145,7 +146,7 @@ static DWORD WINAPI WorkerThread(LPVOID lpParameter) {
TPOOL_ctx* TPOOL_create(int nbWorkers, int queueSize) TPOOL_ctx* TPOOL_create(int nbWorkers, int queueSize)
{ {
TPOOL_ctx* const ctx = malloc(sizeof(TPOOL_ctx)); TPOOL_ctx* const ctx = calloc(1, sizeof(TPOOL_ctx));
if (!ctx) return NULL; if (!ctx) return NULL;
/* parameters sanitization */ /* parameters sanitization */
@ -164,6 +165,11 @@ TPOOL_ctx* TPOOL_create(int nbWorkers, int queueSize)
/* Create worker threads */ /* Create worker threads */
ctx->nbWorkers = nbWorkers; ctx->nbWorkers = nbWorkers;
ctx->workerThreads = (HANDLE*)malloc(sizeof(HANDLE) * nbWorkers);
if (ctx->workerThreads == NULL) {
TPOOL_free(ctx);
return NULL;
}
for (int i = 0; i < nbWorkers; i++) { for (int i = 0; i < nbWorkers; i++) {
ctx->workerThreads[i] = CreateThread(NULL, 0, WorkerThread, ctx, 0, NULL); ctx->workerThreads[i] = CreateThread(NULL, 0, WorkerThread, ctx, 0, NULL);
if (!ctx->workerThreads[i]) { if (!ctx->workerThreads[i]) {