
    T                        S r SSKrSSKrSSKJr  SSKJr  S/r\R                  " S5      r
\R                  " S5      r\R                  " S5      r\R                  " S	5      r\R                  " S
5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S\R,                  5      r\R                  " S\R,                  5      r\R                  " S\R,                  5      r\R                  " S5      r\R                  " S5      rS rS r " S S\R<                  5      rg)zA parser for HTML and XHTML.    N)unescape)html5
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z&#(?:[0-9]|[xX][0-9a-fA-F])z6&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?z	<[a-zA-Z]z
</[a-zA-Z]>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                     U R                  S5      nUR                  S5      (       a  [        U5      $ UR                  S5      (       d  USS  [        ;   a  [        U5      $ U$ )Nr   &#=   )group
startswithr   endswithhtml5_entities)matchrefs     8platform/bundledpythonunix/lib/python3.13/html/parser.py_replace_attr_charrefr   ]   sU    
++a.C
~~d} <<QRN!:}J    c                 6    [         R                  [        U 5      $ N)attr_charrefsubr   )ss    r   _unescape_attrvaluer   i   s    1155r   c                      ^  \ rS rSrSrSrSrSSS.U 4S jjrU 4S	 jrS
 r	S r
SrS rSS.S jrS rS%S jrS rS rS%S jrS&S jrS rS rS rS rS rS rS rS rS rS rS  rS! rS" r S# r!S$r"U =r#$ )'r   m   a  Find tags and other markup and call handler functions.

Usage:
    p = HTMLParser()
    p.feed(data)
    ...
    p.close()

Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag().  The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks).  If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
)scriptstylexmpiframenoembednoframes)textareatitleTF)convert_charrefs	scriptingc                Z   > [         TU ]  5         Xl        X l        U R	                  5         g)aJ  Initialize and reset this instance.

If convert_charrefs is true (the default), all character references
are automatically converted to the corresponding Unicode characters.

If *scripting* is false (the default), the content of the
``noscript`` element is parsed normally; if it's true,
it's returned as is without being parsed.
N)super__init__r$   r%   reset)selfr$   r%   	__class__s      r   r(   HTMLParser.__init__   s$     	 0"

r   c                 ~   > SU l         SU l        [        U l        SU l        SU l        SU l        [        TU ]!  5         g)z1Reset this instance.  Loses all unprocessed data. z???NT)	rawdatalasttaginteresting_normalinteresting
cdata_elem_support_cdata
_escapabler'   r)   )r*   r+   s    r   r)   HTMLParser.reset   s8    -"r   c                 N    U R                   U-   U l         U R                  S5        g)zyFeed data to the parser.

Call this as often as you want, with as little or as much text
as you want (may include '\n').
r   N)r/   goaheadr*   datas     r   feedHTMLParser.feed   s     ||d*Qr   c                 &    U R                  S5        g)zHandle any buffered data.r
   N)r8   r*   s    r   closeHTMLParser.close   s    Qr   Nc                     U R                   $ )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr>   s    r   get_starttag_textHTMLParser.get_starttag_text   s    ###r   	escapablec                   UR                  5       U l        X l        U R                  S:X  a  [        R                  " S5      U l        g U(       aZ  U R                  (       dI  [        R                  " SU R                  -  [        R                  [        R                  -  5      U l        g [        R                  " SU R                  -  [        R                  [        R                  -  5      U l        g )N	plaintextz\Zz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	lowerr3   r5   recompiler2   r$   
IGNORECASEASCII)r*   elemrF   s      r   set_cdata_modeHTMLParser.set_cdata_mode   s    **,#??k)!zz%0Dt44!zz*Dt*V*,--*@ BD  "zz*BT__*T*,--*@ BDr   c                 6    [         U l        S U l        SU l        g )NT)r1   r2   r3   r5   r>   s    r   clear_cdata_modeHTMLParser.clear_cdata_mode   s    -r   c                     Xl         g)a  Enable or disable support of the CDATA sections.
If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

This method is not called by default. Its purpose is to be called
in custom handle_starttag() and handle_endtag() methods, with
value that depends on the adjusted current node.
See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
for details.
N)r4   )r*   flags     r   _set_support_cdataHTMLParser._set_support_cdata   s
     #r   c                 2   U R                   nSn[        U5      nX4:  Ga   U R                  (       a|  U R                  (       dk  UR	                  SU5      nUS:  aR  UR                  S[        X4S-
  5      5      nUS:  a,  [        R                  " S5      R                  X&5      (       d  GOvUnOHU R                  R                  X#5      nU(       a  UR                  5       nOU R                  (       a  GO-UnX5:  aR  U R                  (       a.  U R                  (       a  U R                  [        X#U 5      5        OU R                  X#U 5        U R                  X55      nX4:X  a  GOUR                   nU" SU5      (       Ga  ["        R%                  X#5      (       a  U R'                  U5      n	OU" SU5      (       a  U R)                  U5      n	OU" SU5      (       a  U R+                  U5      n	OiU" SU5      (       a  U R-                  U5      n	OIU" S	U5      (       a  U R/                  U5      n	O)US
-   U:  d  U(       a  U R                  S5        US
-   n	OGOU	S:  Ga  U(       d  GO["        R%                  X#5      (       a  GOrU" SU5      (       aP  US-   U:X  a  U R                  S5        GOH[0        R%                  X#5      (       a  GO,U R3                  X#S-   S  5        GOU" SU5      (       aK  UnS H,  n
UR5                  XS-   5      (       d  M  U[        U
5      -  n  O   U R3                  X#S-   U 5        OU" SU5      (       a(  U R6                  (       a  U R9                  X#S-   S  5        OX#US-    R;                  5       S:X  a  U R=                  X#S-   S  5        OUU" S	U5      (       a  U R3                  X#S-   S  5        O0U" SU5      (       a  U R?                  X#S-   S  5        O[A        S5      eUn	U R                  X95      nGOU" SU5      (       a  [B        R%                  X#5      nU(       a^  URE                  5       SS nU RG                  U5        URI                  5       n	U" SU	S
-
  5      (       d  U	S
-
  n	U R                  X95      nGMu  [J        R%                  X#5      nU(       a2  U(       a)  U RG                  X#S-   S  5        U R                  X45      nGOEGOCUS-   U:  a'  U R                  S5        U R                  X3S-   5      nGOGOU" SU5      (       a  [L        R%                  X#5      nU(       a\  URE                  S
5      nU RO                  U5        URI                  5       n	U" SU	S
-
  5      (       d  U	S
-
  n	U R                  X95      nGM{  [P        R%                  X#5      nU(       a0  U(       a(  U RO                  X#S
-   S  5        U R                  X45      nO@O?US
-   U:  a&  U R                  S5        U R                  X3S
-   5      nOO S5       eX4:  a  GM   U(       ah  X4:  ac  U R                  (       a.  U R                  (       a  U R                  [        X#U 5      5        OU R                  X#U 5        U R                  X45      nX#S  U l         g )Nr   <&"   z[\t\n\r\f ;]</<!--<?<!r
      )z--!z---   	<![CDATA[   	   	<!doctypezwe should not get here!r   ;zinteresting.search() lied))r/   lenr$   r3   findrfindmaxrJ   rK   searchr2   startr5   handle_datar   	updateposr   starttagopenr   parse_starttagparse_endtagparse_commentparse_piparse_html_declaration
endtagopenhandle_commentr   r4   unknown_declrI   handle_decl	handle_piAssertionErrorcharrefr   handle_charrefendincomplete_charref	entityrefhandle_entityref
incomplete)r*   r   r/   injampposr   r   ksuffixnames               r   r8   HTMLParser.goahead   s   ,,Le$$T__LLa(q5 %]]3At=F!JJ7>>wOOA((//;AAu((T__$$Xgl%;<$$Wq\2q$Avu ++J#q!!%%g11++A.Aa(())!,A****1-Aa((a(Aa((33A6A!eq[C$$S)AAq5#))'55#D!,,q5A: ,,T2'--g99  !//!>#FA..&8F&//!<< !S[ 0 % '9 ++GaCN;#K338K8K))'A#$-8 1Q3--/;>((17#D!,,++GaCDM:#D!,,wst}5,-FGGANN1(D!$$g1 ;;=2.D''-		A%c1Q3//Eq,A*00<++GaCDM: NN10UQY $$T*qa%0AC##!3 ;;q>D))$/		A%c1Q3//Eq,A"((4--gcdm< NN10UQY $$S)qa%0A555qG eJ 15$$  'A,!78  1.q$Ar{r   c                    U R                   nX!US-    S:X  d   S5       eX!US-    S:X  a  U R                  U5      $ X!US-    S:X  aH  U R                  (       a7  UR                  SUS-   5      nUS	:  a  g
U R	                  X!S-   U 5        US-   $ X!US-    R                  5       S:X  a7  UR                  SUS-   5      nUS
:X  a  g
U R                  X!S-   U 5        US-   $ U R                  U5      $ )Nr`   r_   z+unexpected call to parse_html_declaration()rb   r]   re   rc   z]]>r   rg   rd   rf   r   r
   )r/   rt   r4   rj   ry   rI   rz   parse_bogus_comment)r*   r   r/   r   gtposs        r   rv   !HTMLParser.parse_html_declarationj  s   ,,1~% 	D )C 	D%QqS>V#%%a((qs^{*t/B/BUAaC(A1ugc1o.q5Lqs^!!#{2LLac*E{WqS/07N++A..r   c                 N   U R                   nUR                  SU5      (       d   S5       e[        R                  X1S-   5      nU(       d   [        R                  X1S-   5      nU(       d  gU(       a&  UR                  5       nU R                  X1S-   U 5        UR                  5       $ )Nr]   z"unexpected call to parse_comment()rb   rg   )	r/   r   commentcloserm   commentabruptcloser   rn   rx   r   )r*   r   reportr/   r   r   s         r   rt   HTMLParser.parse_comment  s    ,,!!&!,,R.RR,##GqS1&,,Wc:EA!Q0yy{r   c                     U R                   nX1US-    S;   d   S5       eUR                  SUS-   5      nUS:X  a  gU(       a  U R                  X1S-   U 5        US-   $ )Nr`   )r_   r\   z(unexpected call to parse_bogus_comment()r   rg   r
   )r/   rj   rx   )r*   r   r   r/   poss        r   r   HTMLParser.parse_bogus_comment  sq    ,,1~- 	I 1H 	I-ll3!$"9!C 01Qwr   c                     U R                   nX!US-    S:X  d   S5       e[        R                  X!S-   5      nU(       d  gUR                  5       nU R	                  X!S-   U 5        UR                  5       nU$ )Nr`   r^   zunexpected call to parse_pi()rg   )r/   picloserm   rn   r{   r   r*   r   r/   r   r   s        r   ru   HTMLParser.parse_pi  sn    ,,1~%F'FF%w!,KKMwsA'IIKr   c                 ,   S U l         U R                  U5      nUS:  a  U$ U R                  nX1U U l         / n[        R	                  X1S-   5      nU(       d   S5       eUR                  5       nUR                  S5      R                  5       =U l        nXb:  a  [        R	                  X65      nU(       d  OUR                  SSS5      u  pnU
(       d  S nO0US S Ss=:X  a	  USS  :X  d  O  US S Ss=:X  a	  USS  :X  a  O  OUSS nU(       a  [        U5      nUR                  U	R                  5       U45        UR                  5       nXb:  a  M  X6U R                  5       nUS	;  a  U R                  X1U 5        U$ UR                  S
5      (       a  U R                  Xt5        U$ U R!                  Xt5        XpR"                  ;   d  U R$                  (       a  US:X  d  US:X  a  U R'                  USS9  U$ XpR(                  ;   a  U R'                  USS9  U$ )Nr   r
   z#unexpected call to parse_starttag()r`   rd   'rg   ")r   />r   noscriptrH   FrE   T)rB   check_for_whole_start_tagr/   tagfind_tolerantr   r   r   rI   r0   attrfind_tolerantr   appendstripro   r   handle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr%   rO   RCDATA_CONTENT_ELEMENTS)r*   r   endposr/   attrsr   r   tagmattrnamerest	attrvaluer   s                r   rr   HTMLParser.parse_starttag  s     $//2A:M,,&0  &&w!4;;;uIIK"[[^1133sj!''3A()1a(8%HI 	2A$8)BC.82A#7237%aO	/	:	LL(..*I67A j %%'k!Wv./M<<##C/    ,222C:$5{"##C5#9  444##C4#8r   c                     U R                   n[        R                  X!S-   5      nU(       d   eUR                  5       nX$S-
     S:w  a  gU$ )Nr
   r   rg   )r/   locatetagendr   r   r   s        r   r   $HTMLParser.check_for_whole_start_tag  sG    ,,""7aC0uIIKQ3<3r   c                 .   U R                   nX!US-    S:X  d   S5       eUR                  SUS-   5      S:  a  g[        R                  X!5      (       d$  X!S-   US-    S:X  a  US-   $ U R	                  U5      $ [
        R                  X!S-   5      nU(       d   eUR                  5       nX$S-
     S:w  a  g[        R                  X!S-   5      nU(       d   eUR                  S5      R                  5       nU R                  U5        U R                  5         U$ )	Nr`   r\   zunexpected call to parse_endtagr   r   rg   rd   r
   )r/   rj   rw   r   r   r   r   r   r   rI   handle_endtagrR   )r*   r   r/   r   r   r   s         r   rs   HTMLParser.parse_endtag  s    ,,1~%H'HH%<<QqS!A%++s1Q33&s
//22""7aC0uIIKQ3<3 !&&w!4ukk!n""$3r   c                 H    U R                  X5        U R                  U5        g r   )r   r   r*   r   r   s      r   r   HTMLParser.handle_startendtag  s    S(3r   c                     g r    r   s      r   r   HTMLParser.handle_starttag	      r   c                     g r   r   )r*   r   s     r   r   HTMLParser.handle_endtag  r   r   c                     g r   r   r*   r   s     r   r~   HTMLParser.handle_charref  r   r   c                     g r   r   r   s     r   r   HTMLParser.handle_entityref  r   r   c                     g r   r   r9   s     r   ro   HTMLParser.handle_data  r   r   c                     g r   r   r9   s     r   rx   HTMLParser.handle_comment  r   r   c                     g r   r   )r*   decls     r   rz   HTMLParser.handle_decl!  r   r   c                     g r   r   r9   s     r   r{   HTMLParser.handle_pi%  r   r   c                     g r   r   r9   s     r   ry   HTMLParser.unknown_decl(  r   r   )	__starttag_textr5   r4   r3   r$   r2   r0   r/   r%   )T)r
   )$__name__
__module____qualname____firstlineno____doc__r   r   r(   r)   r;   r?   rB   rC   rO   rR   rV   r8   rv   rt   r   ru   rr   r   rs   r   r   r   r~   r   ro   rx   rz   r{   ry   __static_attributes____classcell__)r+   s   @r   r   r   m   s    0 Y3+/5   O$ 16 
B
# O#h/2		.d< 
 r   ) r   rJ   _markupbasehtmlr   html.entitiesr   r   __all__rK   r1   r   r   r}   r   r   rq   rw   r   r   r   r   VERBOSEr   r   locatestarttagend_tolerant	endendtag
endtagfindr   r   
ParserBaser   r   r   r   <module>r      si   " 
   1 . ZZ' ZZ%
JJ>?	
**@
AZZ => zzSTzz+&ZZ%

**S/zz(#ZZ'  ::QR JJ   ZZ  zz  ZZ   ZZ ) ZZ  JJsO	ZZ>?

6|'' |r   