Liquid UI: Validate Japanese Half-Width(Hankaku) Katakana Character
This example is to create the logic validate if there's any Half-width (Hankaku) Katakana character in the user input.
From Unicode, the Half-width Katakana characters are within certain range.
The logic is to convert all characters into Unicode then determine if any result is within the range of Half-width Katakana.
Step 1: Create user interface
//User interface
//Delete all existing pushbuttons on toolbar
del("P[User menu]");
del("P[SAP menu]");
del("P[SAP Business Workplace]");
del("P[Other menu]");
del("P[Add to Favorites]");
del("P[Delete Favorites]");
del("P[Change Favorites]");
del("P[Move Favorites down]");
del("P[Move Favorites up]");
del("P[Create role]");
del("P[Assign users]");
del("P[Documentation]");
clearscreen();
inputfield([2,2], "Input Data", [2,20], {"name":"z_input", "size":40});
pushbutton([TOOLBAR], "Validate Input", "?", {"process":validateHalfWidthKatakana});
Step 2: Create function to validate the converted unicode result
//Function to validate the converted unicode result
function validateHalfWidthKatakana(){
onscreen "*"
var splited_ary = getSplitedTextAry(z_input); //Logic to split the input data into an array by actual characters
var regular_char_counter = 0;
var half_width_char_counter = 0;
var full_width_char_counter = 0;
var other_char_counter = 0;
var cur_unicode = "";
//Logic to calculate how many Half-Width Katakana/regular character/number/syntax in the input data
for(var k=0; k<splited_ary.length; k++){
if(splited_ary[k].length > 1){
cur_unicode = getUnicode(splited_ary[k]); //Logic to get Unicode for each character
if( (cur_unicode >= "3000" && cur_unicode <= "30ff") || //Punctuation, Hiragana, Katakana
(cur_unicode >= "FF00" && cur_unicode <= "FF9F") || //Full-width Roman, Half-width Katakana
(cur_unicode >= "4E00" && cur_unicode <= "9FAF") || //CJK (Common & Uncommon)
(cur_unicode >= "3400" && cur_unicode <= "4DBF")){ //CJK Ext. A (Rare)
if(cur_unicode >= "FF61" && cur_unicode <= "FF9F"){ //It's a Half width katakana (Hankaku)
half_width_char_counter++;
} else{
//It's either Punctuation, Hiragana, Katakana, Full-width Roman, CJK, or CJK Ext. A
full_width_char_counter++;
}
} else {
other_char_counter++; //Unhandled characters
}
} else {
regular_char_counter++; //It's regular character/number/syntax
}
}
println("=====>> Half-Width count: "+half_width_char_counter);
println("=====>> Full-Width count: "+full_width_char_counter);
message("S:Input Data contain " + half_width_char_counter + " Half-Width Katakana");
enter("?");
}
Step 3: Create function to splite the string based on actual characters
//Function to return an array with splitted text
function getSplitedTextAry(str){
var result_ary = [];
var ref_str = str;
var converted_str = encodeURI(str); //Converted the string becomes encoded result
var converted_str_ary = [];
var ref_str_ary = [];
//Loop until the converted string becomes nothing
while(converted_str.length > 0){
//If Unicode character is found in the string
if(converted_str.indexOf("%") > -1){
//If Unicode character is not from the first character
if(converted_str.indexOf("%") != 0){
converted_str_ary.push(converted_str.slice(0,converted_str.indexOf("%")));
ref_str_ary.push(ref_str.slice(0,converted_str.indexOf("%")));
ref_str = ref_str.slice(converted_str.indexOf("%"));
converted_str = converted_str.slice(converted_str.indexOf("%"));
}
//When Unicode character is from the first character
else {
converted_str_ary.push(converted_str.slice(0,18)); //Every actual character are 18 char long after encoded
ref_str_ary.push(ref_str.slice(0,3)); //Every actual character are 3 char long before encoded
converted_str = converted_str.slice(18); //Subtract the string
ref_str = ref_str.slice(3); //Subtract the string
}
}
//When Unicode character is not found in the string
else {
converted_str_ary.push(converted_str); //Push the rest of the string
ref_str_ary.push(ref_str); //Push the rest of the string
converted_str = ""; //Clear string
}
}
//Reform the return string
for(var k=0; k<converted_str_ary.length; k++){
if(converted_str_ary[k].indexOf("%") < 0){ //If it's a regular character/string
for(var j=0; j<ref_str_ary[k].length; j++){ //Push individual characters into the array
result_ary.push(ref_str_ary[k].charAt(j));
}
} else { //If it's a multi-byte character
result_ary.push(ref_str_ary[k]); //Push entire multi-byte character into the array
}
}
return result_ary;
}
Note: This logic is required to recognize individual actuual character in the string.
Here we only consider the 3-byte characters for UTF-8 encoding.
Step 4: Create function to converted multi-byte character to its Unicode value
//Function to return Unicode in string
function getUnicode(str){
var result_ary = [];
//Logic to get the actual byte value for each unencoded character
for(var k=0; k<str.length; k++){
println("===>>"+str.charCodeAt(k).toString(2).substring(24,32)+"<==");
result_ary.push(str.charCodeAt(k).toString(2).substring(24,32));
}
//Logic to form multi-byte data become 16-bit hex value
switch(result_ary.length){
case 1: // U+00000000 - U+0000007F 0xxxxxxx
var ucode = "";
break;
case 2: // U+00000080 - U+000007FF 110xxxxx 10xxxxxx
var ucode = "";
break;
case 3: //U+00000800 - U+0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
var c1 = parseInt(result_ary[0],2);
var c2 = parseInt(result_ary[1],2);
var c3 = parseInt(result_ary[2],2);
var b1 = (c1 << 4) | ((c2 >> 2) & 0x0F);
var b2 = ((c2 & 0x03) << 6) | (c3 & 0x3F);
var ucode = ((b1 & 0x00FF) << 8) | b2;
break;
case 4: // U+00010000 - U+001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
var ucode = "";
break;
case 5: // U+00200000 - U+03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxx
var ucode = "";
break;
case 6: // U+04000000 - U+7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxx
var ucode = "";
break;
}
println("===>>Unicode="+ucode.toString(16).toUpperCase()+"<==");
return ucode.toString(16).toUpperCase(); //Return Unicode value in hex
}
Note: Unicode conversion should include 6 different cases.
Here we only consider the 3-byte characters for UTF-8 encoding.
See attachments for code samples