/** @file
  UCS2 to UTF8 manipulation library.
  Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.
  (C) Copyright 2020 Hewlett Packard Enterprise Development LP
    SPDX-License-Identifier: BSD-2-Clause-Patent
**/
#include 
#include 
#include 
#include 
#include 
#include 
/**
  Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,
  this function is used to retrieve the UTF8 encoding size for a UCS2 character.
  @param[in]   Utf8Buffer       The buffer for UTF8 encoded data.
  @retval      Return the size of UTF8 encoding string or 0 if it is not for
               UCS2 format.
**/
UINT8
GetUTF8SizeForUCS2 (
  IN    CHAR8  *Utf8Buffer
  )
{
  CHAR8  TempChar;
  UINT8  Utf8Size;
  ASSERT (Utf8Buffer != NULL);
  TempChar = *Utf8Buffer;
  if ((TempChar & 0xF0) == 0xF0) {
    //
    // This format is not for UCS2.
    //
    return 0;
  }
  Utf8Size = 1;
  if ((TempChar & 0x80) == 0x80) {
    if ((TempChar & 0xC0) == 0xC0) {
      Utf8Size++;
      if ((TempChar & 0xE0) == 0xE0) {
        Utf8Size++;
      }
    }
  }
  return Utf8Size;
}
/**
  Since each UCS2 character can be represented by the format: \uXXXX, this function
  is used to retrieve the UCS2 character from a Unicode format.
  Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.
  @param[in]    Utf8Buffer             The buffer for UTF8 encoded data.
  @param[out]   Ucs2Char               The converted UCS2 character.
  @retval       EFI_INVALID_PARAMETER  Non-Ascii characters found in the hexadecimal
                                       digits string, and can't be converted to a UCS2
                                       character.
  @retval       EFI_SUCCESS            The UCS2 character has been retrieved.
**/
EFI_STATUS
GetUCS2CharByFormat (
  IN    CHAR8   *Utf8Buffer,
  OUT   CHAR16  *Ucs2Char
  )
{
  UINT8  Num1;
  UINT8  Num2;
  UINT8  Index;
  CHAR8  Ucs2CharFormat[UNICODE_FORMAT_CHAR_SIZE];     /// two Hexadecimal digits Ascii string, like "3F"
  for (Index = 0; Index < 4; Index++) {
    if ((*(Utf8Buffer + 2 + Index) & 0x80) != 0x00) {
      return EFI_INVALID_PARAMETER;
    }
  }
  ZeroMem (Ucs2CharFormat, UNICODE_FORMAT_CHAR_SIZE);
  //
  // Get the First Number, Offset is 2
  //
  CopyMem (Ucs2CharFormat, Utf8Buffer + 2, UNICODE_FORMAT_CHAR_LEN);
  Num1 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
  //
  // Get the Second Number, Offset is 4
  //
  CopyMem (Ucs2CharFormat, Utf8Buffer + 4, UNICODE_FORMAT_CHAR_LEN);
  Num2 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
  //
  // Ucs2Char is Little-Endian
  //
  *((CHAR8 *)Ucs2Char)       = Num2;
  *(((CHAR8 *)Ucs2Char) + 1) = Num1;
  return EFI_SUCCESS;
}
/**
  Convert a UCS2 character to UTF8 encoding string.
  @param[in]    Ucs2Char               The provided UCS2 character.
  @param[out]   Utf8Buffer             The converted UTF8 encoded data.
  @retval      Return the size of UTF8 encoding data for this UCS2 character.
**/
UINT8
UCS2CharToUTF8 (
  IN  CHAR16  Ucs2Char,
  OUT CHAR8   *Utf8Buffer
  )
{
  UINT16  Ucs2Number;
  ASSERT (Utf8Buffer != NULL);
  Ucs2Number = (UINT16)Ucs2Char;
  if (Ucs2Number <= 0x007F) {
    //
    // UTF8 format: 0xxxxxxx
    //
    *Utf8Buffer = Ucs2Char & 0x7F;
    return 1;
  } else if ((Ucs2Number >= 0x0080) && (Ucs2Number <= 0x07FF)) {
    //
    // UTF8 format: 110xxxxx 10xxxxxx
    //
    *(Utf8Buffer + 1) = (Ucs2Char & 0x3F) | 0x80;
    *Utf8Buffer       = ((Ucs2Char >> 6) & 0x1F) | 0xC0;
    return 2;
  } else {
    /// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF
    //
    // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
    //
    *(Utf8Buffer + 2) = (Ucs2Char & 0x3F) | 0x80;
    *(Utf8Buffer + 1) = ((Ucs2Char >> 6) & 0x3F) | 0x80;
    *Utf8Buffer       = ((Ucs2Char >> 12) & 0x0F) | 0xE0;
    return 3;
  }
}
/**
  Convert a UTF8 encoded data to a UCS2 character.
  @param[in]    Utf8Buffer             The provided UTF8 encoded data.
  @param[out]   Ucs2Char               The converted UCS2 character.
  @retval       EFI_INVALID_PARAMETER  The UTF8 encoded string is not valid or
                                       not for UCS2 character.
  @retval       EFI_SUCCESS            The converted UCS2 character.
**/
EFI_STATUS
UTF8ToUCS2Char (
  IN   CHAR8   *Utf8Buffer,
  OUT  CHAR16  *Ucs2Char
  )
{
  UINT8  Utf8Size;
  CHAR8  *Ucs2Buffer;
  CHAR8  TempChar1;
  CHAR8  TempChar2;
  CHAR8  TempChar3;
  ASSERT (Utf8Buffer != NULL && Ucs2Char != NULL);
  ZeroMem (Ucs2Char, sizeof (CHAR16));
  Ucs2Buffer = (CHAR8 *)Ucs2Char;
  Utf8Size = GetUTF8SizeForUCS2 (Utf8Buffer);
  switch (Utf8Size) {
    case 1:
      //
      // UTF8 format: 0xxxxxxx
      //
      TempChar1 = *Utf8Buffer;
      if ((TempChar1 & 0x80) != 0x00) {
        return EFI_INVALID_PARAMETER;
      }
      *Ucs2Buffer       = TempChar1;
      *(Ucs2Buffer + 1) = 0;
      break;
    case 2:
      //
      // UTF8 format: 110xxxxx 10xxxxxx
      //
      TempChar1 = *Utf8Buffer;
      if ((TempChar1 & 0xE0) != 0xC0) {
        return EFI_INVALID_PARAMETER;
      }
      TempChar2 = *(Utf8Buffer + 1);
      if ((TempChar2 & 0xC0) != 0x80) {
        return EFI_INVALID_PARAMETER;
      }
      *Ucs2Buffer       = (TempChar1 << 6) + (TempChar2 & 0x3F);
      *(Ucs2Buffer + 1) = (TempChar1 >> 2) & 0x07;
      break;
    case 3:
      //
      // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
      //
      TempChar1 = *Utf8Buffer;
      if ((TempChar1 & 0xF0) != 0xE0) {
        return EFI_INVALID_PARAMETER;
      }
      TempChar2 = *(Utf8Buffer + 1);
      if ((TempChar2 & 0xC0) != 0x80) {
        return EFI_INVALID_PARAMETER;
      }
      TempChar3 = *(Utf8Buffer + 2);
      if ((TempChar3 & 0xC0) != 0x80) {
        return EFI_INVALID_PARAMETER;
      }
      *Ucs2Buffer       = (TempChar2 << 6) + (TempChar3 & 0x3F);
      *(Ucs2Buffer + 1) = (TempChar1 << 4) + ((TempChar2 >> 2) & 0x0F);
      break;
    default:
      return EFI_INVALID_PARAMETER;
  }
  return EFI_SUCCESS;
}
/**
  Convert a UCS2 string to a UTF8 encoded string.
  @param[in]    Ucs2Str                The provided UCS2 string.
  @param[out]   Utf8StrAddr            The converted UTF8 string address. Caller
                                       is responsible for Free this string.
  @retval       EFI_INVALID_PARAMETER  One or more parameters are invalid.
  @retval       EFI_OUT_OF_RESOURCES   System runs out of resources.
  @retval       EFI_SUCCESS            The UTF8 encoded string has been converted.
**/
EFI_STATUS
UCS2StrToUTF8 (
  IN  CHAR16  *Ucs2Str,
  OUT CHAR8   **Utf8StrAddr
  )
{
  UINTN  Ucs2StrIndex;
  UINTN  Ucs2StrLength;
  CHAR8  *Utf8Str;
  UINTN  Utf8StrLength;
  UINTN  Utf8StrIndex;
  CHAR8  Utf8Buffer[UTF8_BUFFER_FOR_UCS2_MAX_SIZE];
  UINT8  Utf8BufferSize;
  if ((Ucs2Str == NULL) || (Utf8StrAddr == NULL)) {
    return EFI_INVALID_PARAMETER;
  }
  Ucs2StrLength = StrLen (Ucs2Str);
  Utf8StrLength = 0;
  for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
    ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
    Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
    Utf8StrLength += Utf8BufferSize;
  }
  Utf8Str = AllocateZeroPool (Utf8StrLength + 1);
  if (Utf8Str == NULL) {
    return EFI_OUT_OF_RESOURCES;
  }
  Utf8StrIndex = 0;
  for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
    ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
    Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
    CopyMem (Utf8Str + Utf8StrIndex, Utf8Buffer, Utf8BufferSize);
    Utf8StrIndex += Utf8BufferSize;
  }
  Utf8Str[Utf8StrIndex] = '\0';
  *Utf8StrAddr          = Utf8Str;
  return EFI_SUCCESS;
}
/**
  Convert a UTF8 encoded string to a UCS2 string.
  @param[in]    Utf8Str                The provided UTF8 encoded string.
  @param[out]   Ucs2StrAddr            The converted UCS2 string address. Caller
                                       is responsible for Free this string.
  @retval       EFI_INVALID_PARAMETER  The UTF8 encoded string is not valid to
                                       convert to UCS2 string.
                                       One or more parameters are invalid.
  @retval       EFI_OUT_OF_RESOURCES   System runs out of resources.
  @retval       EFI_SUCCESS            The UCS2 string has been converted.
**/
EFI_STATUS
UTF8StrToUCS2 (
  IN  CHAR8   *Utf8Str,
  OUT CHAR16  **Ucs2StrAddr
  )
{
  EFI_STATUS  Status;
  UINTN       Utf8StrIndex;
  UINTN       Utf8StrLength;
  UINTN       Ucs2StrIndex;
  UINT8       Utf8BufferSize;
  CHAR16      *Ucs2StrTemp;
  if ((Utf8Str == NULL) || (Ucs2StrAddr == NULL)) {
    return EFI_INVALID_PARAMETER;
  }
  //
  // It is not an Ascii string, calculate string length.
  //
  Utf8StrLength = 0;
  while (*(Utf8Str + Utf8StrLength) != '\0') {
    Utf8StrLength++;
  }
  //
  // UCS2 string shall not be longer than the UTF8 string.
  //
  Ucs2StrTemp = AllocateZeroPool ((Utf8StrLength + 1) * sizeof (CHAR16));
  if (Ucs2StrTemp == NULL) {
    return EFI_OUT_OF_RESOURCES;
  }
  Utf8StrIndex = 0;
  Ucs2StrIndex = 0;
  while (Utf8Str[Utf8StrIndex] != '\0') {
    if ((CompareMem (Utf8Str + Utf8StrIndex, "\\u", 2) == 0) &&
        (Utf8StrLength - Utf8StrIndex >= UNICODE_FORMAT_LEN))
    {
      Status = GetUCS2CharByFormat (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
      if (!EFI_ERROR (Status)) {
        Utf8StrIndex += UNICODE_FORMAT_LEN;
        Ucs2StrIndex++;
      } else {
        StrCpyS (Ucs2StrTemp + Ucs2StrIndex, 3, L"\\u");
        Ucs2StrIndex += 2;
        Utf8StrIndex += 2;
      }
    } else {
      Utf8BufferSize = GetUTF8SizeForUCS2 (Utf8Str + Utf8StrIndex);
      if ((Utf8BufferSize == 0) || (Utf8StrLength - Utf8StrIndex < Utf8BufferSize)) {
        FreePool (Ucs2StrTemp);
        return EFI_INVALID_PARAMETER;
      }
      Status = UTF8ToUCS2Char (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
      if (EFI_ERROR (Status)) {
        FreePool (Ucs2StrTemp);
        return EFI_INVALID_PARAMETER;
      }
      Ucs2StrIndex++;
      Utf8StrIndex += Utf8BufferSize;
    }
  }
  *Ucs2StrAddr = AllocateZeroPool ((Ucs2StrIndex + 1) * sizeof (CHAR16));
  if (*Ucs2StrAddr == NULL) {
    FreePool (Ucs2StrTemp);
    return EFI_OUT_OF_RESOURCES;
  }
  StrCpyS (*Ucs2StrAddr, Ucs2StrIndex + 1, Ucs2StrTemp);
  *(*Ucs2StrAddr + Ucs2StrIndex) = L'\0';
  FreePool (Ucs2StrTemp);
  return EFI_SUCCESS;
}