--- NEW FILE: bitset.cpp ---
* Copyright (c) 2002, International Business Machines
* Corporation and others.  All Rights Reserved.
* $Source: /usr/local/cvsroot/icu-sword/source/test/usetperf/bitset.cpp,v $ 
* 2002-09-20 aliu Created.

#include "unicode/utypes.h"
#include "cmemory.h"
#include "bitset.h"

// TODO: have a separate capacity, so the len can just be set to
// zero in the clearAll() method, and growth can be smarter.

const int32_t SLOP = 8;

const int32_t BYTES_PER_WORD = sizeof(int32_t);

BitSet::BitSet() {
    len = SLOP;
    data = (int32_t*) uprv_malloc(len * BYTES_PER_WORD);

BitSet::~BitSet() {

UBool BitSet::get(int32_t bitIndex) const {
    uint32_t longIndex = bitIndex >> 5;
    int32_t bitInLong = bitIndex & 0x1F;
    return (longIndex < len) ? (((data[longIndex] >> bitInLong) & 1) != 0)
        : FALSE;

void BitSet::set(int32_t bitIndex) {
    uint32_t longIndex = bitIndex >> 5;
    int32_t bitInLong = bitIndex & 0x1F;
    if (longIndex >= len) {
    data[longIndex] |= (1 << bitInLong);

void BitSet::clearAll() {
    for (uint32_t i=0; i<len; ++i) data[i] = 0;

void BitSet::ensureCapacity(uint32_t minLen) {
    uint32_t newLen = len;
    while (newLen < minLen) newLen <<= 1; // grow exponentially
    int32_t* newData = (int32_t*) uprv_malloc(newLen * BYTES_PER_WORD);
    uprv_memcpy(newData, data, len * BYTES_PER_WORD);
    data = newData;
    int32_t* p = data + len;
    int32_t* limit = data + newLen;
    while (p < limit) *p++ = 0;
    len = newLen;


--- NEW FILE: bitset.h ---
* Copyright (c) 2002, International Business Machines
* Corporation and others.  All Rights Reserved.
* $Source: /usr/local/cvsroot/icu-sword/source/test/usetperf/bitset.h,v $ 
* 2002-09-20 aliu Created.
#ifndef __BITSET_H__
#define __BITSET_H__

#include "unicode/utypes.h"

 * A simple, limited clone of the java.util.BitSet.
class BitSet {

    uint32_t len;
    int32_t* data;

    void ensureCapacity(uint32_t minLen);



    UBool get(int32_t bitIndex) const;

    void set(int32_t bitIndex);

    // Non-java
    void clearAll();

    // TODO add other methods as needed.


--- NEW FILE: timer.h ---
* Copyright (c) 2002, International Business Machines
* Corporation and others.  All Rights Reserved.
* $Source: /usr/local/cvsroot/icu-sword/source/test/usetperf/timer.h,v $ 
* 2002-09-20 aliu Created.
#ifndef __PERFTIMER_H__
#define __PERFTIMER_H__

#include "unicode/utypes.h"

// Derived from Ram's perftime.h

// Win32

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

#include <windows.h>

class Timer {
    LARGE_INTEGER tstart, tend;
    Timer() {}
    inline void start() {
    inline double stop() {
        LARGE_INTEGER freq;
        int result = QueryPerformanceFrequency(&freq);
        return ((double)(tend.QuadPart - tstart.QuadPart))/((double)freq.QuadPart);


#include <sys/time.h> 

class Timer {
    struct timeval tstart, tend;
    struct timezone tz;
    Timer() {}
    inline void start() {
        gettimeofday(&tstart, &tz);
    inline double stop() {
        gettimeofday(&tend, &tz);
        double t1, t2;
        t1 = (double)tstart.tv_sec + (double)tstart.tv_usec*1e-6;
        t2 = (double)tend.tv_sec + (double)tend.tv_usec*1e-6;
        return t2-t1;


--- NEW FILE: usetperf.cpp ---
* Copyright (c) 2002, International Business Machines
* Corporation and others.  All Rights Reserved.
* $Source: /usr/local/cvsroot/icu-sword/source/test/usetperf/usetperf.cpp,v $ 
* 2002-09-20 aliu Created.

#include <stdio.h>

#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/usetiter.h"
#include "bitset.h"
#include "timer.h"

#define LENGTH(a) (sizeof(a)/sizeof(a[0]))

int main(int argc, const char *argv[]) {

    Timer timer;
    BitSet bs;
    UnicodeSet us;
    int32_t i, j, n, temp;
    UChar32 cp;
    double t;

    int32_t PROPS[] = {
        // category         iterations for add, contains, iterator
        U_TITLECASE_LETTER, 100, 100, 20000000,
        U_UNASSIGNED,       30, 100, 20000000,

    for (j=0; j<LENGTH(PROPS); j+=4) {
        UCharCategory prop = (UCharCategory) PROPS[j];

        printf("\nGetting characters for character category %d\n", prop);
        int32_t total = 0;
        for (cp=0; cp<0x110000; ++cp) {
            if (u_charType(cp) == prop) {
                bs.set((int32_t) cp);
        printf("Total characters: %d\n", total);
        // add()
        n = PROPS[j+1];
        printf("Testing add() x %d...", n);
        for (i=0; i<n; ++i) {
            for (cp=0; cp<0x110000; ++cp) {
                if (bs.get((int32_t) cp)) {
        t = timer.stop();
        printf("result: %f sec => %f ms/loop\n", t, t*1e3/n);

        // contains()
        n = PROPS[j+2];
        printf("Testing contains() x %d...", n);
        temp = 0;
        for (i=0; i<n; ++i) {
            for (cp=0; cp<0x110000; ++cp) {
                if (us.contains(cp)) {
                    temp += cp;
        t = timer.stop();
        printf("result: %f sec => %f ms/loop\n", t, t*1e3/n);
        // iterator
        n = PROPS[j+3];
        printf("Testing iterator x %d...", n);
        temp = 0;
        for (i=0; i<n; ++i) {
            UnicodeSetIterator uit(us);
            while (uit.next()) {
                temp += uit.getCodepoint();
        t = timer.stop();
        printf("result: %f sec => %f ns/loop\n", t, t*1e9/n);

    char* PAT[] = {



    UErrorCode ec = U_ZERO_ERROR;

    n = 2000;

    for (j=0; j<LENGTH(PAT); ++j) {

        printf("\nApplying pattern %s x %d...", PAT[j], n);
        UnicodeSet set;
        UnicodeString pat(PAT[j], "");

        for (i=0; i<n; i++) {
            set.applyPattern(pat, ec);
        t = timer.stop();
        printf("result: %f sec => %f us/loop\n", t, t*1e6/n);

    return 0;

