the Compartmented Robust Posix C++ Unit Test system

Collating strings using a locale

Collating strings is to compare their sort order, according to the rules of a locale. The result of a comparison may be different depending on the locale used.

With crpcut it is easy to make the comparison. The result of crpcut::collate can be compared against a string using the comparison operators <, <=, !=, ==, >=, and > using the ASSERT_PRED(pred, ...) or the ASSERT_TRUE(expr) macro.

[Note]Note
Operator == in a collation check does not mean strict equality, but rather that the left hand side and right hand side are equitable in the sort order of by the locale.

Consider this class that can represent a range of names sorted in a desired locale:

     
     #include <set>
     #include <string>
     #include <algorithm>
     
     template <const char *(&locname)>
     class sorted_names
     {
       class comparator
       {
         typedef std::collate<char> coll_t;
       public:
         comparator() : loc(locname) {}
         bool operator()(const std::string &lh, const std::string &rh) const
         {
           const coll_t &coll = std::use_facet<coll_t>(loc);
           return coll.compare(lh.c_str(), lh.c_str()+lh.length(),
                               rh.c_str(), rh.c_str()+rh.length()) < 0;
         }
         std::locale loc;
       };
       typedef std::multiset<std::string, comparator> collection;
     public:
       typedef typename collection::const_iterator iterator;
       void push(std::string name)
       {
         names.insert(name);
       }
       iterator begin() const
       {
         return names.begin();
       }
       iterator end()
       {
         return names.end();
       }
     private:
       collection names;
     };
     

Working with locales in C++ is a headache, but writing the test program with crpcut isn't too bad:

     
     #include <crpcut.hpp>
     #include "sorted-names.hpp"
     
     #define aring "\xc3\xa5"
     #define auml  "\xc3\xa4"
     #define ouml  "\xc3\xb6"
     #define Aring "\xc3\x85"
     #define Auml  "\xc3\x84"
     #define Ouml  "\xc3\x96"
     
     template <const char *(&locname)>
     class name_fixture
     {
     protected:
       name_fixture()
       {
         names.push(Auml "ngla");
         names.push(Ouml "rjan");
         names.push(Auml "rlig");
         names.push("Bj" ouml "rn");
       }
       sorted_names<locname> names;
     };
     
     template <const char *(&locname)>
     class sort_checker
     {
     public:
       template <typename iter>
       static void verify(iter b, iter e)
       {
         iter i = b++;
         while (b != e)
           {
             INFO << *i << "<=" << *b;
             ASSERT_PRED(crpcut::collate(*i, std::locale(locname)) <= *b);
             i = b++;
           }
       }
     };
     
     const char *sv_SE = "sv_SE.utf8";
     const char *de_DE = "de_DE.utf8";
     
     
     TEST(coll_equal, name_fixture<sv_SE>)
     {
       sort_checker<sv_SE>::verify(names.begin(), names.end());
     }
     
     TEST(coll_mismatch, name_fixture<de_DE>)
     {
       sort_checker<sv_SE>::verify(names.begin(), names.end());
     }
     
     int main(int argc, char *argv[])
     {
       return crpcut::run(argc, argv);
     }
     
     

See Fixtures for common test setups for more information on using fixtures to reduce code repetition in tests.

The result of the run is:


     PASSED: coll_equal
     info---------------------------------------------------------------------------
     Björn<=Ängla
     -------------------------------------------------------------------------------
     info---------------------------------------------------------------------------
     Ängla<=Ärlig
     -------------------------------------------------------------------------------
     info---------------------------------------------------------------------------
     Ärlig<=Örjan
     ===============================================================================
     FAILED: coll_mismatch
     info---------------------------------------------------------------------------
     Ängla<=Ärlig
     -------------------------------------------------------------------------------
     info---------------------------------------------------------------------------
     Ärlig<=Björn
     -------------------------------------------------------------------------------
     phase="running"  --------------------------------------------------------------
     /home/bjorn/devel/crpcut/doc-src/samples/collate_example.cpp:63
     ASSERT_PRED(crpcut::collate(*i, std::locale(locname)) <= *b)
     crpcut::collate(*i, std::locale(locname)) <= *b :
     Failed in locale "sv_SE.utf8"
       with left hand value = "Ärlig"
       and right hand value = "Björn"
     
     -------------------------------------------------------------------------------
     ===============================================================================
     Total 2 test cases selected
     UNTESTED : 0
     PASSED   : 1
     FAILED   : 1

Collations can be tested after translation to upper- or lower-case using a templated version of crpcut::collate. Please see crpcut::collate in the reference manual for details.