Mini Shell

Direktori : /opt/cppython/lib/python3.8/html/__pycache__/
Upload File :
Current File : //opt/cppython/lib/python3.8/html/__pycache__/parser.cpython-38.pyc

U

��eg9E�@s�dZddlZddlZddlZddlmZdgZe�d�Ze�d�Z	e�d�Z
e�d�Ze�d	�Ze�d
�Z
e�d�Ze�d�Ze�d
�Ze�dej�Ze�d
�Ze�d�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZdZdd�dd�Zdd�Zd	d
�Zdd�Zd
Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zd9dd�Zdd�Zdd�Zdd �Zd!d"�Zd#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Zd-d.�Zd/d0�Zd1d2�Zd3d4�Zd5d6�Zd7d8�Zd
S):raEFind tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )Zscript�styleT)�convert_charrefscCs||_|��dS)z�Initialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r�reset)�selfr�r	�*/opt/cppython/lib/python3.8/html/parser.py�__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tj�|�dS)z1Reset this instance.  Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser�rr	r	r
r`s
zHTMLParser.resetcCs|j||_|�d�dS)z�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        rN)r
�goahead�r�datar	r	r
�feedhszHTMLParser.feedcCs|�d�dS)zHandle any buffered data.�N)rrr	r	r
�closeqszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_textrr	r	r
�get_starttag_textwszHTMLParser.get_starttag_textcCs$|��|_t�d|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r�elemr	r	r
�set_cdata_mode{s
zHTMLParser.set_cdata_modecCst|_d|_dS�N)rrrrr	r	r
�clear_cdata_modeszHTMLParser.clear_cdata_modecCsX|j}d}t|�}||k�r�|jrv|jsv|�d|�}|dkr�|�dt||d��}|dkrpt�d��	||�sp�q�|}n*|j
�	||�}|r�|��}n|jr��q�|}||kr�|jr�|js�|�t
|||���n|�|||��|�||�}||kr��q�|j}|d|��rJt�||��r"|�|�}	n�|d|��r:|�|�}	nn|d|��rR|�|�}	nV|d|��rj|�|�}	n>|d	|��r�|�|�}	n&|d
|k�r�|�d�|d
}	n�q�|	dk�r<|�s��q�|�d|d
�}	|	dk�r�|�d|d
�}	|	dk�r|d
}	n|	d
7}	|j�r*|j�s*|�t
|||	���n|�|||	��|�||	�}q|d|��r�t�||�}|�r�|��d
d�}
|�|
�|��}	|d|	d
��s�|	d
}	|�||	�}qn<d||d�k�r�|�|||d
��|�||d
�}�q�q|d|��r�t�||�}|�rP|�d
�}
|�|
�|��}	|d|	d
��sB|	d
}	|�||	�}qt�||�}|�r�|�r�|��||d�k�r�|��}	|	|k�r�|}	|�||d
�}�q�n.|d
|k�r�|�d�|�||d
�}n�q�qdstd��q|�rF||k�rF|j�sF|j�r(|j�s(|�t
|||���n|�|||��|�||�}||d�|_dS)Nr�<�&�"z[\s;]�</�<!--�<?�<!rrz&#�����;zinteresting.search() lied)r
�lenrr�find�rfind�maxrr�searchr�start�handle_datarZ	updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�charref�group�handle_charref�end�	entityref�handle_entityref�
incomplete�AssertionError)rrAr
�i�n�jZampposr8r6�k�namer	r	r
r�s�
�











zHTMLParser.goaheadcCs�|j}|||d�dks"td��|||d�dkr@|�|�S|||d�dkr^|�|�S|||d���d	kr�|�d
|d�}|dkr�dS|�||d|��|dS|�|�SdS)
Nr,r+z+unexpected call to parse_html_declaration()�r)�z<![�	z	<!doctyperr-r)r
rEr;Zparse_marked_sectionrr0�handle_decl�parse_bogus_comment)rrFr
�gtposr	r	r
r=s

z!HTMLParser.parse_html_declarationrcCs`|j}|||d�dks"td��|�d|d�}|dkr>dS|rX|�||d|��|dS)Nr,)r+r(z"unexpected call to parse_comment()rr-r)r
rEr0�handle_comment)rrFZreportr
�posr	r	r
rOszHTMLParser.parse_bogus_commentcCsd|j}|||d�dks"td��t�||d�}|s:dS|��}|�||d|��|��}|S)Nr,r*zunexpected call to parse_pi()r-)r
rE�picloser3r4�	handle_pirA)rrFr
r8rHr	r	r
r<!szHTMLParser.parse_picCs�d|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|sPtd��|��}|�d���|_	}||k�r.t
�||�}|s��q.|�ddd�\}	}
}|
s�d}n\|dd�dkr�|dd�ks�n|dd�dkr�|dd�k�rnn|dd�}|�rt|�}|�|	��|f�|��}ql|||��
�}|d	k�r�|��\}
}d
|jk�r�|
|j�d
�}
t|j�|j�d
�}n|t|j�}|�|||��|S|�d��r�|�||�n"|�||�||jk�r�|�|�|S)Nrrz#unexpected call to parse_starttag()r,rL�'r-�")r�/>�
rW)r�check_for_whole_start_tagr
�tagfind_tolerantr8rErAr?rr�attrfind_tolerantr�append�stripZgetpos�countr/r1r5�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr")rrF�endposr
�attrsr8rI�tag�m�attrname�restZ	attrvaluerA�lineno�offsetr	r	r
r9-s\

&
�
�


�
zHTMLParser.parse_starttagcCs�|j}t�||�}|r�|��}|||d�}|dkr>|dS|dkr~|�d|�rZ|dS|�d|�rjdS||krv|S|dS|dkr�dS|dkr�dS||kr�|S|dStd	��dS)
Nrr�/rWr,r-rz6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r
�locatestarttagend_tolerantr8rAr6rE)rrFr
rfrH�nextr	r	r
rY`s.z$HTMLParser.check_for_whole_start_tagcCs.|j}|||d�dks"td��t�||d�}|s:dS|��}t�||�}|s�|jdk	rr|�|||��|St	�||d�}|s�|||d�dkr�|dS|�
|�S|�d���}|�
d|���}|�|�|dS|�d���}|jdk	�r||jk�r|�|||��|S|�|�|��|S)	Nr,r(zunexpected call to parse_endtagrr-rLz</>r)r
rE�	endendtagr3rA�
endtagfindr8rr5rZrOr?rr0�
handle_endtagr$)rrFr
r8rPZ	namematchZtagnamer!r	r	r
r:�s8



zHTMLParser.parse_endtagcCs|�||�|�|�dSr#)rarp�rrerdr	r	r
r`�szHTMLParser.handle_startendtagcCsdSr#r	rqr	r	r
ra�szHTMLParser.handle_starttagcCsdSr#r	)rrer	r	r
rp�szHTMLParser.handle_endtagcCsdSr#r	�rrJr	r	r
r@�szHTMLParser.handle_charrefcCsdSr#r	rrr	r	r
rC�szHTMLParser.handle_entityrefcCsdSr#r	rr	r	r
r5�szHTMLParser.handle_datacCsdSr#r	rr	r	r
rQ�szHTMLParser.handle_commentcCsdSr#r	)rZdeclr	r	r
rN�szHTMLParser.handle_declcCsdSr#r	rr	r	r
rT�szHTMLParser.handle_picCsdSr#r	rr	r	r
�unknown_decl�szHTMLParser.unknown_declcCstjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r,)�
stacklevel)�warnings�warn�DeprecationWarningr)r�sr	r	r
r�s
�zHTMLParser.unescape)r)�__name__�
__module__�__qualname__�__doc__rbrrrrrrr"r$rr=rOr<r9rYr:r`rarpr@rCr5rQrNrTrsrr	r	r	r
r?s8		z
3"()r|rrurZhtmlr�__all__rrrDrBr>r7rSZcommentcloserZr[�VERBOSErlrnrorrr	r	r	r
�<module>s,








��



Zerion Mini Shell 1.0