[C++] 94 文字列からひらがなを取り出す

[M1 Mac, Big Sur 11.6.7, clang 13.0.0, NO IDE]

日本語はマルチバイト文字なので文字列の扱いが難しいです。

今回は漢字とひらがなが混ざった文字列からひらがなだけを取り出しました。

#include <cppstd.h> // 自製c++標準ライブラリ群

vector<string> hiragana_list = 
    {
    "あ","い","う","え","お",
    "か","き","く","け","こ",
    "さ","し","す","せ","そ",
    "た","ち","つ","て","と",
    "な","に","ぬ","ね","の",
    "は","ひ","ふ","へ","ほ",
    "ま","み","む","め","も",
    "や","ゆ","よ",
    "ら","り","る","れ","ろ",
    "わ","を","ん",
    "が","ぎ","ぐ","げ","ご",
    "ざ","じ","ず","ぜ","ぞ",
    "だ","ぢ","づ","で","ど",
    "ば","び","ぶ","べ","ぼ",
    "ぱ","ぴ","ぷ","ぺ","ぽ",
    "ゃ","ゅ","ょ","っ"
    };

vector<string> str_kana;

string hiragana_extract(string str)
{
    int pos;
    unsigned char lead; 
    int char_size;

   for (pos = 0; pos < str.size(); pos += char_size) {
        lead = str[pos];

        if (lead < 0x80) {
            char_size = 1;
        } else if (lead < 0xE0) { 
            char_size = 2;
        } else if (lead < 0xF0) {
            char_size = 3;
        } else {
            char_size = 4;
        }

        cout << str.substr(pos, char_size) << endl;
        string moji = str.substr(pos, char_size);
        
        for (auto &item : hiragana_list) {
            if (item == moji) {
                str_kana.push_back(moji);
            }
        }
    }

    // vector内ひらがなを結合
    std::ostringstream os;
    std::copy(str_kana.begin(), str_kana.end(), std::ostream_iterator<string>(os));
    string hiragana = os.str(); 

    return hiragana;
}

int main(int argc, char **argv)
{
    string str = "水浅葱 みずあさぎ";

    string hiragana = hiragana_extract(str);

    cout << "ひらがな抽出 " << hiragana << endl;

    return 0;
}
--------------------------------------------------
出力
--------------------------------------------------
水
浅
葱
 
み
ず
あ
さ
ぎ
ひらがな抽出 みずあさぎ

2022/7/21追記:
以下の記事にてより洗練されたコードに書き直しました。