The Lib directory from the cPython 2.7.10 distribution. These files are unchanged and set the baseline for subsequent commits. Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Daryl McDaniel <edk2-lists@mc2research.org> git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18740 6f19259b-4bc3-4df7-8a09-765794883524
		
			
				
	
	
		
			289 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			289 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
 | |
| 
 | |
| import stringprep, re, codecs
 | |
| from unicodedata import ucd_3_2_0 as unicodedata
 | |
| 
 | |
| # IDNA section 3.1
 | |
| dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
 | |
| 
 | |
| # IDNA section 5
 | |
| ace_prefix = "xn--"
 | |
| uace_prefix = unicode(ace_prefix, "ascii")
 | |
| 
 | |
| # This assumes query strings, so AllowUnassigned is true
 | |
| def nameprep(label):
 | |
|     # Map
 | |
|     newlabel = []
 | |
|     for c in label:
 | |
|         if stringprep.in_table_b1(c):
 | |
|             # Map to nothing
 | |
|             continue
 | |
|         newlabel.append(stringprep.map_table_b2(c))
 | |
|     label = u"".join(newlabel)
 | |
| 
 | |
|     # Normalize
 | |
|     label = unicodedata.normalize("NFKC", label)
 | |
| 
 | |
|     # Prohibit
 | |
|     for c in label:
 | |
|         if stringprep.in_table_c12(c) or \
 | |
|            stringprep.in_table_c22(c) or \
 | |
|            stringprep.in_table_c3(c) or \
 | |
|            stringprep.in_table_c4(c) or \
 | |
|            stringprep.in_table_c5(c) or \
 | |
|            stringprep.in_table_c6(c) or \
 | |
|            stringprep.in_table_c7(c) or \
 | |
|            stringprep.in_table_c8(c) or \
 | |
|            stringprep.in_table_c9(c):
 | |
|             raise UnicodeError("Invalid character %r" % c)
 | |
| 
 | |
|     # Check bidi
 | |
|     RandAL = map(stringprep.in_table_d1, label)
 | |
|     for c in RandAL:
 | |
|         if c:
 | |
|             # There is a RandAL char in the string. Must perform further
 | |
|             # tests:
 | |
|             # 1) The characters in section 5.8 MUST be prohibited.
 | |
|             # This is table C.8, which was already checked
 | |
|             # 2) If a string contains any RandALCat character, the string
 | |
|             # MUST NOT contain any LCat character.
 | |
|             if filter(stringprep.in_table_d2, label):
 | |
|                 raise UnicodeError("Violation of BIDI requirement 2")
 | |
| 
 | |
|             # 3) If a string contains any RandALCat character, a
 | |
|             # RandALCat character MUST be the first character of the
 | |
|             # string, and a RandALCat character MUST be the last
 | |
|             # character of the string.
 | |
|             if not RandAL[0] or not RandAL[-1]:
 | |
|                 raise UnicodeError("Violation of BIDI requirement 3")
 | |
| 
 | |
|     return label
 | |
| 
 | |
| def ToASCII(label):
 | |
|     try:
 | |
|         # Step 1: try ASCII
 | |
|         label = label.encode("ascii")
 | |
|     except UnicodeError:
 | |
|         pass
 | |
|     else:
 | |
|         # Skip to step 3: UseSTD3ASCIIRules is false, so
 | |
|         # Skip to step 8.
 | |
|         if 0 < len(label) < 64:
 | |
|             return label
 | |
|         raise UnicodeError("label empty or too long")
 | |
| 
 | |
|     # Step 2: nameprep
 | |
|     label = nameprep(label)
 | |
| 
 | |
|     # Step 3: UseSTD3ASCIIRules is false
 | |
|     # Step 4: try ASCII
 | |
|     try:
 | |
|         label = label.encode("ascii")
 | |
|     except UnicodeError:
 | |
|         pass
 | |
|     else:
 | |
|         # Skip to step 8.
 | |
|         if 0 < len(label) < 64:
 | |
|             return label
 | |
|         raise UnicodeError("label empty or too long")
 | |
| 
 | |
|     # Step 5: Check ACE prefix
 | |
|     if label.startswith(uace_prefix):
 | |
|         raise UnicodeError("Label starts with ACE prefix")
 | |
| 
 | |
|     # Step 6: Encode with PUNYCODE
 | |
|     label = label.encode("punycode")
 | |
| 
 | |
|     # Step 7: Prepend ACE prefix
 | |
|     label = ace_prefix + label
 | |
| 
 | |
|     # Step 8: Check size
 | |
|     if 0 < len(label) < 64:
 | |
|         return label
 | |
|     raise UnicodeError("label empty or too long")
 | |
| 
 | |
| def ToUnicode(label):
 | |
|     # Step 1: Check for ASCII
 | |
|     if isinstance(label, str):
 | |
|         pure_ascii = True
 | |
|     else:
 | |
|         try:
 | |
|             label = label.encode("ascii")
 | |
|             pure_ascii = True
 | |
|         except UnicodeError:
 | |
|             pure_ascii = False
 | |
|     if not pure_ascii:
 | |
|         # Step 2: Perform nameprep
 | |
|         label = nameprep(label)
 | |
|         # It doesn't say this, but apparently, it should be ASCII now
 | |
|         try:
 | |
|             label = label.encode("ascii")
 | |
|         except UnicodeError:
 | |
|             raise UnicodeError("Invalid character in IDN label")
 | |
|     # Step 3: Check for ACE prefix
 | |
|     if not label.startswith(ace_prefix):
 | |
|         return unicode(label, "ascii")
 | |
| 
 | |
|     # Step 4: Remove ACE prefix
 | |
|     label1 = label[len(ace_prefix):]
 | |
| 
 | |
|     # Step 5: Decode using PUNYCODE
 | |
|     result = label1.decode("punycode")
 | |
| 
 | |
|     # Step 6: Apply ToASCII
 | |
|     label2 = ToASCII(result)
 | |
| 
 | |
|     # Step 7: Compare the result of step 6 with the one of step 3
 | |
|     # label2 will already be in lower case.
 | |
|     if label.lower() != label2:
 | |
|         raise UnicodeError("IDNA does not round-trip", label, label2)
 | |
| 
 | |
|     # Step 8: return the result of step 5
 | |
|     return result
 | |
| 
 | |
| ### Codec APIs
 | |
| 
 | |
| class Codec(codecs.Codec):
 | |
|     def encode(self,input,errors='strict'):
 | |
| 
 | |
|         if errors != 'strict':
 | |
|             # IDNA is quite clear that implementations must be strict
 | |
|             raise UnicodeError("unsupported error handling "+errors)
 | |
| 
 | |
|         if not input:
 | |
|             return "", 0
 | |
| 
 | |
|         result = []
 | |
|         labels = dots.split(input)
 | |
|         if labels and len(labels[-1])==0:
 | |
|             trailing_dot = '.'
 | |
|             del labels[-1]
 | |
|         else:
 | |
|             trailing_dot = ''
 | |
|         for label in labels:
 | |
|             result.append(ToASCII(label))
 | |
|         # Join with U+002E
 | |
|         return ".".join(result)+trailing_dot, len(input)
 | |
| 
 | |
|     def decode(self,input,errors='strict'):
 | |
| 
 | |
|         if errors != 'strict':
 | |
|             raise UnicodeError("Unsupported error handling "+errors)
 | |
| 
 | |
|         if not input:
 | |
|             return u"", 0
 | |
| 
 | |
|         # IDNA allows decoding to operate on Unicode strings, too.
 | |
|         if isinstance(input, unicode):
 | |
|             labels = dots.split(input)
 | |
|         else:
 | |
|             # Must be ASCII string
 | |
|             input = str(input)
 | |
|             unicode(input, "ascii")
 | |
|             labels = input.split(".")
 | |
| 
 | |
|         if labels and len(labels[-1]) == 0:
 | |
|             trailing_dot = u'.'
 | |
|             del labels[-1]
 | |
|         else:
 | |
|             trailing_dot = u''
 | |
| 
 | |
|         result = []
 | |
|         for label in labels:
 | |
|             result.append(ToUnicode(label))
 | |
| 
 | |
|         return u".".join(result)+trailing_dot, len(input)
 | |
| 
 | |
| class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
 | |
|     def _buffer_encode(self, input, errors, final):
 | |
|         if errors != 'strict':
 | |
|             # IDNA is quite clear that implementations must be strict
 | |
|             raise UnicodeError("unsupported error handling "+errors)
 | |
| 
 | |
|         if not input:
 | |
|             return ("", 0)
 | |
| 
 | |
|         labels = dots.split(input)
 | |
|         trailing_dot = u''
 | |
|         if labels:
 | |
|             if not labels[-1]:
 | |
|                 trailing_dot = '.'
 | |
|                 del labels[-1]
 | |
|             elif not final:
 | |
|                 # Keep potentially unfinished label until the next call
 | |
|                 del labels[-1]
 | |
|                 if labels:
 | |
|                     trailing_dot = '.'
 | |
| 
 | |
|         result = []
 | |
|         size = 0
 | |
|         for label in labels:
 | |
|             result.append(ToASCII(label))
 | |
|             if size:
 | |
|                 size += 1
 | |
|             size += len(label)
 | |
| 
 | |
|         # Join with U+002E
 | |
|         result = ".".join(result) + trailing_dot
 | |
|         size += len(trailing_dot)
 | |
|         return (result, size)
 | |
| 
 | |
| class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
 | |
|     def _buffer_decode(self, input, errors, final):
 | |
|         if errors != 'strict':
 | |
|             raise UnicodeError("Unsupported error handling "+errors)
 | |
| 
 | |
|         if not input:
 | |
|             return (u"", 0)
 | |
| 
 | |
|         # IDNA allows decoding to operate on Unicode strings, too.
 | |
|         if isinstance(input, unicode):
 | |
|             labels = dots.split(input)
 | |
|         else:
 | |
|             # Must be ASCII string
 | |
|             input = str(input)
 | |
|             unicode(input, "ascii")
 | |
|             labels = input.split(".")
 | |
| 
 | |
|         trailing_dot = u''
 | |
|         if labels:
 | |
|             if not labels[-1]:
 | |
|                 trailing_dot = u'.'
 | |
|                 del labels[-1]
 | |
|             elif not final:
 | |
|                 # Keep potentially unfinished label until the next call
 | |
|                 del labels[-1]
 | |
|                 if labels:
 | |
|                     trailing_dot = u'.'
 | |
| 
 | |
|         result = []
 | |
|         size = 0
 | |
|         for label in labels:
 | |
|             result.append(ToUnicode(label))
 | |
|             if size:
 | |
|                 size += 1
 | |
|             size += len(label)
 | |
| 
 | |
|         result = u".".join(result) + trailing_dot
 | |
|         size += len(trailing_dot)
 | |
|         return (result, size)
 | |
| 
 | |
| class StreamWriter(Codec,codecs.StreamWriter):
 | |
|     pass
 | |
| 
 | |
| class StreamReader(Codec,codecs.StreamReader):
 | |
|     pass
 | |
| 
 | |
| ### encodings module API
 | |
| 
 | |
| def getregentry():
 | |
|     return codecs.CodecInfo(
 | |
|         name='idna',
 | |
|         encode=Codec().encode,
 | |
|         decode=Codec().decode,
 | |
|         incrementalencoder=IncrementalEncoder,
 | |
|         incrementaldecoder=IncrementalDecoder,
 | |
|         streamwriter=StreamWriter,
 | |
|         streamreader=StreamReader,
 | |
|     )
 |