o
    %e&                  	   @   s,  d Z ddlZddlmZ ddlmZ d ZZdZi Z	e
djZe
djZd	Zi Zzed
ZW n eefyC   eejd ZY nw ee D ]!Zejdkrkejdd Zeee< edd Zeevrkeee< qJdd eD Zdd ZG dd dZ G dd de!Z"G dd dZ#G dd dZ$dS )zY

Pyphen
======

Pure Python module to hyphenate text, inspired by Ruby's Text::Hyphen.

    N)	resources)Pathz0.14.0)Pyphen	LANGUAGESlanguage_fallbackz\^{2}([0-9a-f]{2})z
(\d?)(\D?))%#LEFTHYPHENMINRIGHTHYPHENMINCOMPOUNDLEFTHYPHENMINCOMPOUNDRIGHTHYPHENMINzpyphen.dictionariesdictionariesz.dic   _c                 C   s   i | ]}|  |qS  )lower).0namer   r   B/var/www/html/venv/lib/python3.10/site-packages/pyphen/__init__.py
<dictcomp>-   s    r   c                 C   sH   |  dd d}|r"d|} | tv rt|  S |  |sdS dS )a	  Get a fallback language available in our dictionaries.

    http://www.unicode.org/reports/tr35/#Locale_Inheritance

    We use the normal truncation inheritance. This function needs aliases
    including scripts for languages with multiple regions available.

    -r   N)replacer   splitjoinLANGUAGES_LOWERCASEpop)languagepartsr   r   r   r   0   s   	
r   c                   @       e Zd ZdZdd Zdd ZdS )AlternativeParserzParser of nonstandard hyphen pattern alternative.

    The instance returns a special int with data about the current position in
    the pattern when called with an odd value.

    c                 C   sP   | d}|d | _t|d | _t|d | _|dr&|  jd7  _d S d S )N,r         .)r   changeintindexcut
startswith)selfpatternalternativer   r   r   __init__H   s   


zAlternativeParser.__init__c                 C   s8   |  j d8  _ t|}|d@ rt|| j| j | jfS |S )Nr"   )r'   r&   DataIntr%   r(   )r*   valuer   r   r   __call__P   s
   zAlternativeParser.__call__N)__name__
__module____qualname____doc__r-   r0   r   r   r   r   r    A   s    r    c                   @   s   e Zd ZdZdddZdS )r.   zE``int`` with some other data can be stuck to in a ``data`` attribute.Nc                 C   s0   t | |}|rt|tr|j|_|S ||_|S )zCreate a new ``DataInt``.

        Call with ``reference=dataint_object`` to use the data from another
        ``DataInt``.

        )r&   __new__
isinstancer.   data)clsr/   r7   	referenceobjr   r   r   r5   [   s   zDataInt.__new__)NN)r1   r2   r3   r4   r5   r   r   r   r   r.   Y   s    r.   c                   @   r   )HyphDictzHyphenation patterns.c           	         sJ  i | _ |d  }| dkrd}||ddd D ]t}| }|r-|t	r.q t
dd |}d	|v rKd
|v rK|d	d\}}t|| nt t fddt|D  \}}t|dkrcq dt|}}|| sv|d7 }|| rn||d  s|d8 }||d  r||||| f| j d|< q i | _tdd | j D | _dS )zhRead a ``hyph_*.dic`` and parse its patterns.

        :param path: Path of hyph_*.dic to read

        rbzmicrosoft-cp1251cp1251
r"   Nc                 S   s   t t| ddS )Nr"      )chrr&   group)matchr   r   r   <lambda>   s    z#HyphDict.__init__.<locals>.<lambda>/=c                    s    g | ]\}}| |pd fqS )0r   )r   istringfactoryr   r   
<listcomp>   s    z%HyphDict.__init__.<locals>.<listcomp>r    c                 s   s    | ]}t |V  qd S )N)len)r   keyr   r   r   	<genexpr>   s    z$HyphDict.__init__.<locals>.<genexpr>)patternsopenreadlinedecoder   	read_textr   stripr)   ignored	parse_hexr    r&   zipparsemaxrM   r   cachemaxlen)	r*   pathencodingr+   r,   tagsvaluesstartendr   rI   r   r-   m   s<   zHyphDict.__init__c                 C   s   |  }| j|}|du rtd| d}dgt|d  }tt|d D ]@}t|| j t|d }t|d |D ]*}| j||| }|sIq:|\}	}
t||	 ||	 t|
 }t	t
|
|| ||< q:q%dd t|D  | j|< }|S )a  Get a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        E.g. for the dutch word 'lettergrepen' this method returns ``[3, 6,
        9]``.

        Each position is a ``DataInt`` with a data attribute.

        If the data attribute is not ``None``, it contains a tuple with
        information about nonstandard hyphenation at that point: ``(change,
        index, cut)``.

        change
          a string like ``'ff=f'``, that describes how hyphenation should
          take place.

        index
          where to substitute the change, counting from the current point

        cut
          how many characters to remove while substituting the nonstandard
          hyphenation

        Nr$   r   r"   c                 S   s(   g | ]\}}|d  rt |d |dqS )r#   r"   )r9   )r.   )r   rG   r9   r   r   r   rK      s
    z&HyphDict.positions.<locals>.<listcomp>)r   r[   getrM   rangeminr\   rP   slicemaprZ   	enumerate)r*   wordpointspointed_word
referencesrG   stopjr+   offsetr`   slice_r   r   r   	positions   s&   zHyphDict.positionsN)r1   r2   r3   r4   r-   rq   r   r   r   r   r;   j   s    0r;   c                   @   sB   e Zd ZdZdddZdd Zd	d
 ZdddZdddZeZ	dS )r   zEHyphenation class, with methods to hyphenate strings in various ways.Nr#   Tc                 C   sB   |st t| }|| _|| _|r|tvrt|t|< t| | _dS )a  Create an hyphenation instance for given lang or filename.

        :param filename: filename of hyph_*.dic to read
        :param lang: lang of the included dict to use if no filename is given
        :param left: minimum number of characters of the first syllabe
        :param right: minimum number of characters of the last syllabe
        :param cache: if ``True``, use cached copy of the hyphenation patterns

        N)r   r   leftrighthdcacher;   hd)r*   filenamelangrr   rs   r[   r   r   r   r-      s   
zPyphen.__init__c                    s*   t |j   fddj|D S )zGet a list of positions where the word can be hyphenated.

        :param word: unicode string of the word to hyphenate

        See also ``HyphDict.positions``. The points that are too far to the
        left or right are removed.

        c                    s*   g | ]}j |  kr krn n|qS r   )rr   )r   rG   rs   r*   r   r   rK      s   * z$Pyphen.positions.<locals>.<listcomp>)rM   rs   ru   rq   )r*   ri   r   rx   r   rq      s   	zPyphen.positionsc                 c   s    t | |D ]?}|jr:|j\}}}||7 }| r| }|d\}}|d| | |||| d  fV  q|d| ||d fV  qdS )zIterate over all hyphenation possibilities, the longest first.

        :param word: unicode string of the word to hyphenate

        rE   N)reversedrq   r7   isupperupperr   )r*   ri   positionr%   r'   r(   c1c2r   r   r   iterate   s   (zPyphen.iterater   c                 C   s@   |t |8 }| |D ]\}}t ||kr|| |f  S qdS )a  Get the longest possible first part and the last part of a word.

        :param word: unicode string of the word to hyphenate
        :param width: maximum length of the first part
        :param hyphen: unicode string used as hyphen character

        The first part has the hyphen already attached.

        Returns ``None`` if there is no hyphenation point before ``width``, or
        if the word could not be hyphenated.

        N)rM   r   )r*   ri   widthhyphenw1w2r   r   r   wrap   s   zPyphen.wrapc                 C   sv   t |}t| |D ]*}|jr/|j\}}}||7 }| r"| }|d||||| < q||| qd|S )a  Get the word as a string with all the possible hyphens inserted.

        :param word: unicode string of the word to hyphenate
        :param hyphen: unicode string used as hyphen character

        E.g. for the dutch word ``'lettergrepen'``, this method returns the
        unicode string ``'let-ter-gre-pen'``. The hyphen string to use can be
        given as the second parameter, that defaults to ``'-'``.

        rE   rL   )	listry   rq   r7   rz   r{   r   insertr   )r*   ri   r   lettersr|   r%   r'   r(   r   r   r   inserted  s   
zPyphen.inserted)NNr#   r#   T)r   )
r1   r2   r3   r4   r-   rq   r   r   r   r0   r   r   r   r   r      s    


r   )%r4   re	importlibr   pathlibr   VERSION__version____all__rt   compilesubrW   findallrY   rV   r   filesr   AttributeError	TypeError__file__parentsortediterdirr]   suffixr   r   
short_namer   r   r    r&   r.   r;   r   r   r   r   r   <module>   s<    	
c