the Compartmented Robust Posix C++ Unit Test system

Collating strings using a locale

Collating strings is to compare their sort order, according to the rules of a locale. The result of a comparison may be different depending on the locale used.

With crpcut it is easy to make the comparison. The result of crpcut::collate can be compared against a string using the comparison operators <, <=, !=, ==, >=, and > using the ASSERT_TRUE(expr), VERIFY_TRUE(expr), ASSERT_FALSE(expr) or the VERIFY_FALSE(expr) macro. macro.

[Note]Note
Operator == in a collation check does not mean strict equality, but rather that the left hand side and right hand side are equitable in the sort order of the locale.

Consider this class that can represent a range of names sorted in a desired locale:

     
     #include <set>
     #include <string>
     #include <algorithm>
     
     template <const char *(&locname)>
     class sorted_names
     {
       class comparator
       {
         typedef std::collate<char> coll_t;
       public:
         comparator() : loc(locname) {}
         bool operator()(const std::string &lh, const std::string &rh) const
         {
           const coll_t &coll = std::use_facet<coll_t>(loc);
           return coll.compare(lh.c_str(), lh.c_str()+lh.length(),
                               rh.c_str(), rh.c_str()+rh.length()) < 0;
         }
         std::locale loc;
       };
       typedef std::multiset<std::string, comparator> collection;
     public:
       typedef typename collection::const_iterator iterator;
       void push(std::string name)
       {
         names.insert(name);
       }
       iterator begin() const
       {
         return names.begin();
       }
       iterator end()
       {
         return names.end();
       }
     private:
       collection names;
     };
     

Working with locales in C++ is a headache, but writing the test program with crpcut isn't too bad:

     
     #include <crpcut.hpp>
     #include "sorted-names.hpp"
     
     #define aring "\xc3\xa5"
     #define auml  "\xc3\xa4"
     #define ouml  "\xc3\xb6"
     #define Aring "\xc3\x85"
     #define Auml  "\xc3\x84"
     #define Ouml  "\xc3\x96"
     
     template <const char *(&locname)>
     class name_fixture
     {
     protected:
       name_fixture()
       {
         names.push(Auml "ngla");
         names.push(Ouml "rjan");
         names.push(Auml "rlig");
         names.push("Bj" ouml "rn");
       }
       sorted_names<locname> names;
     };
     
     template <const char *(&locname)>
     class sort_checker
     {
     public:
       template <typename iter>
       static void verify(iter b, iter e)
       {
         iter i = b++;
         while (b != e)
           {
             INFO << *i << "<=" << *b;
             ASSERT_TRUE(crpcut::collate(*i, std::locale(locname)) <= *b);
             i = b++;
           }
       }
     };
     
     const char *sv_SE = "sv_SE.utf8";
     const char *de_DE = "de_DE.utf8";
     
     
     TEST(coll_equal, name_fixture<sv_SE>)
     {
       sort_checker<sv_SE>::verify(names.begin(), names.end());
     }
     
     TEST(coll_mismatch, name_fixture<de_DE>)
     {
       sort_checker<sv_SE>::verify(names.begin(), names.end());
     }
     
     int main(int argc, char *argv[])
     {
       return crpcut::run(argc, argv);
     }
     
     

See Fixtures for common test setups for more information on using fixtures to reduce code repetition in tests.

The result of the run is:


     PASSED!: coll_equal
     info---------------------------------------------------------------------------
     samples/collate_example.cpp:62
     Björn<=Ängla
     -------------------------------------------------------------------------------
     info---------------------------------------------------------------------------
     samples/collate_example.cpp:62
     Ängla<=Ärlig
     -------------------------------------------------------------------------------
     info---------------------------------------------------------------------------
     samples/collate_example.cpp:62
     Ärlig<=Örjan
     ===============================================================================
     FAILED!: coll_mismatch
     info---------------------------------------------------------------------------
     samples/collate_example.cpp:62
     Ängla<=Ärlig
     -------------------------------------------------------------------------------
     info---------------------------------------------------------------------------
     samples/collate_example.cpp:62
     Ärlig<=Björn
     -------------------------------------------------------------------------------
     phase="running"  --------------------------------------------------------------
     samples/collate_example.cpp:63
     ASSERT_TRUE(crpcut::collate(*i, std::locale(locname)) <= *b)
       is evaluated as:
         Failed in locale "sv_SE.utf8"
       with left hand value = "Ärlig"
       and right hand value = "Björn"
     -------------------------------------------------------------------------------
     ===============================================================================
     2 test cases selected
     
                    Sum   Critical   Non-critical
     PASSED   :       1          1              0
     FAILED   :       1          1              0

Collations can be tested after translation to upper- or lower-case using a templated version of crpcut::collate. Please see crpcut::collate in the reference manual for details.