mirror of
https://github.com/lz4/lz4.git
synced 2024-11-23 18:04:05 +08:00
227 lines
7.1 KiB
C
227 lines
7.1 KiB
C
/*
|
|
lorem.c - lorem ipsum generator
|
|
Copyright (C) Yann Collet 2024
|
|
|
|
GPL v2 License
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
You can contact the author at :
|
|
- LZ4 source repository : https://github.com/lz4/lz4
|
|
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
*/
|
|
|
|
/* Implementation notes:
|
|
*
|
|
* This is a very simple lorem ipsum generator
|
|
* which features a static list of words
|
|
* and print them one after another randomly
|
|
* with a fake sentence / paragraph structure.
|
|
*
|
|
* It would be possible to create some more complex scheme,
|
|
* notably by enlarging the dictionary of words with a word generator,
|
|
* inventing grammatical rules (composition) and syntax rules.
|
|
* But that's probably overkill for the intended goal.
|
|
*
|
|
* The goal is to generate a text which can be printed,
|
|
* and can be used to fake a text compression scenario.
|
|
* The resulting compression / ratio curve of the lorem ipsum generator
|
|
* is more satisfying than the existing statistical generator,
|
|
* which was initially designed for entropy compression,
|
|
* but lacks a regularity more representative of text,
|
|
* which more properly tests the LZ compression part.
|
|
*/
|
|
|
|
#include "platform.h" /* Compiler options, SET_BINARY_MODE */
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include <assert.h>
|
|
|
|
#define WORD_MAX_SIZE 20
|
|
|
|
/* Define the word pool */
|
|
const char *words[] = {
|
|
"lorem", "ipsum", "dolor", "sit", "amet",
|
|
"consectetur", "adipiscing", "elit", "sed", "do",
|
|
"eiusmod", "tempor", "incididunt", "ut", "labore",
|
|
"et", "dolore", "magna", "aliqua", "dis",
|
|
"lectus", "vestibulum", "mattis", "ullamcorper", "velit",
|
|
"commodo", "a", "lacus", "arcu", "magnis",
|
|
"parturient", "montes", "nascetur", "ridiculus", "mus",
|
|
"mauris", "nulla", "malesuada", "pellentesque", "eget",
|
|
"gravida", "in", "dictum", "non", "erat",
|
|
"nam", "voluptat", "maecenas", "blandit", "aliquam",
|
|
"etiam", "enim", "lobortis", "scelerisque", "fermentum",
|
|
"dui", "faucibus", "ornare", "at", "elementum",
|
|
"eu", "facilisis", "odio", "morbi", "quis",
|
|
"eros", "donec", "ac", "orci", "purus",
|
|
"turpis", "cursus", "leo", "vel", "porta"};
|
|
const int wordCount = sizeof(words) / sizeof(words[0]);
|
|
|
|
/* Note: this unit only works when invoked sequentially.
|
|
* No concurrent access is allowed */
|
|
static char *g_ptr = NULL;
|
|
static size_t g_nbChars = 0;
|
|
static size_t g_maxChars = 10000000;
|
|
|
|
static unsigned g_randRoot = 0;
|
|
|
|
#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
|
|
static unsigned LOREM_rand(unsigned range) {
|
|
static const unsigned prime1 = 2654435761U;
|
|
static const unsigned prime2 = 2246822519U;
|
|
unsigned rand32 = g_randRoot;
|
|
rand32 *= prime1;
|
|
rand32 ^= prime2;
|
|
rand32 = RDG_rotl32(rand32, 13);
|
|
g_randRoot = rand32;
|
|
return (unsigned)(((unsigned long long)rand32 * range) >> 32);
|
|
}
|
|
|
|
static void writeLastCharacters(void) {
|
|
size_t lastChars = g_maxChars - g_nbChars;
|
|
assert(g_maxChars >= g_nbChars);
|
|
if (lastChars == 0)
|
|
return;
|
|
g_ptr[g_nbChars++] = '.';
|
|
if (lastChars > 1) {
|
|
memset(g_ptr + g_nbChars, ' ', lastChars - 1);
|
|
}
|
|
g_nbChars = g_maxChars;
|
|
}
|
|
|
|
static void generateWord(const char *word, const char *separator, int upCase)
|
|
{
|
|
size_t const len = strlen(word) + strlen(separator);
|
|
if (g_nbChars + len > g_maxChars) {
|
|
writeLastCharacters();
|
|
return;
|
|
}
|
|
memcpy(g_ptr + g_nbChars, word, strlen(word));
|
|
if (upCase) {
|
|
static const char toUp = 'A' - 'a';
|
|
g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
|
|
}
|
|
g_nbChars += strlen(word);
|
|
memcpy(g_ptr + g_nbChars, separator, strlen(separator));
|
|
g_nbChars += strlen(separator);
|
|
}
|
|
|
|
static int about(unsigned target) {
|
|
return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
|
|
}
|
|
|
|
/* Function to generate a random sentence */
|
|
static void generateSentence(int nbWords) {
|
|
int commaPos = about(9);
|
|
int comma2 = commaPos + about(7);
|
|
int i;
|
|
for (i = 0; i < nbWords; i++) {
|
|
const char *word = words[LOREM_rand(wordCount)];
|
|
const char* sep = " ";
|
|
if (i == commaPos)
|
|
sep = ", ";
|
|
if (i == comma2)
|
|
sep = ", ";
|
|
if (i == nbWords - 1)
|
|
sep = ". ";
|
|
generateWord(word, sep, i==0);
|
|
}
|
|
}
|
|
|
|
static void generateParagraph(int nbSentences) {
|
|
int i;
|
|
for (i = 0; i < nbSentences; i++) {
|
|
int wordsPerSentence = about(8);
|
|
generateSentence(wordsPerSentence);
|
|
}
|
|
if (g_nbChars < g_maxChars) {
|
|
g_ptr[g_nbChars++] = '\n';
|
|
}
|
|
if (g_nbChars < g_maxChars) {
|
|
g_ptr[g_nbChars++] = '\n';
|
|
}
|
|
}
|
|
|
|
/* It's "common" for lorem ipsum generators to start with the same first
|
|
* pre-defined sentence */
|
|
static void generateFirstSentence(void) {
|
|
int i;
|
|
for (i = 0; i < 18; i++) {
|
|
const char *word = words[i];
|
|
const char *separator = " ";
|
|
if (i == 4)
|
|
separator = ", ";
|
|
if (i == 7)
|
|
separator = ", ";
|
|
generateWord(word, separator, i==0);
|
|
}
|
|
generateWord(words[18], ". ", 0);
|
|
}
|
|
|
|
static size_t
|
|
LOREM_genBlock(void *buffer, size_t size,
|
|
unsigned seed,
|
|
int first, int fill)
|
|
{
|
|
g_ptr = (char*)buffer;
|
|
assert(size < INT_MAX);
|
|
g_maxChars = size;
|
|
g_nbChars = 0;
|
|
g_randRoot = seed;
|
|
if (first) {
|
|
generateFirstSentence();
|
|
}
|
|
while (g_nbChars < g_maxChars) {
|
|
int sentencePerParagraph = about(6);
|
|
generateParagraph(sentencePerParagraph);
|
|
if (!fill)
|
|
break; /* only generate one paragraph in not-fill mode */
|
|
}
|
|
g_ptr = NULL;
|
|
return g_nbChars;
|
|
}
|
|
|
|
void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
|
|
{
|
|
LOREM_genBlock(buffer, size, seed, 1, 1);
|
|
}
|
|
|
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
#define LOREM_BLOCKSIZE (1 << 10)
|
|
void LOREM_genOut(unsigned long long size, unsigned seed)
|
|
{
|
|
char buff[LOREM_BLOCKSIZE + 1] = {0};
|
|
unsigned long long total = 0;
|
|
size_t genBlockSize = (size_t)MIN(size, LOREM_BLOCKSIZE);
|
|
|
|
/* init */
|
|
SET_BINARY_MODE(stdout);
|
|
|
|
/* Generate data, per blocks */
|
|
while (total < size) {
|
|
size_t generated = LOREM_genBlock(buff, genBlockSize, seed++, total == 0, 0);
|
|
assert(generated <= genBlockSize);
|
|
total += generated;
|
|
assert(total <= size);
|
|
fwrite(buff, 1, generated,
|
|
stdout); /* note: should check potential write error */
|
|
if (size - total < genBlockSize)
|
|
genBlockSize = (size_t)(size - total);
|
|
}
|
|
assert(total == size);
|
|
}
|