Funciones de texto txt.tol que retornan Set

Funciones para el manejo de textos, gramática Text, y que retornan un conjunto, Set.

Las funciones de esta página están ordenadas de forma alfabética por las diferentes gramáticas del lenguaje Tol ( Text, Set, Serie, Anything, Code, Date, Real,...) y, dentro de cada gramática, por el nombre de la función. Pueden encontrarse 2 o más funciones con idéntico nombre, pero con distintas maneras de programarse o con diferentes comentarios en diferentes idiomas, estas funciones aparecerán unas a continuación de las otras.

Set Txt2Set() de Ediciones.aContracorriente

//////////////////////////////////////////////////////////////////////////////
Set Txt2Set(Text txtInp, // Texto de entrada
            Text sepTok) // Elemento separador
//////////////////////////////////////////////////////////////////////////////
{
  Set setSep = TxtTokenizer(txtInp, sepTok);
  Set setCmp = EvalSet(setSep, Text(Text eleTxt) { Compact(eleTxt) });
  setCmp
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto a partir de un texto txtInp, troceandolo por un separador
sepTok y dependiendo de ctrFun puede, en este orde, si C compactar, si N
eliminar los texto nulos, si U retornar elementos unicos y si S retornar el
set ordenado.",
Txt2Set);
//////////////////////////////////////////////////////////////////////////////

Set Txt2Set() de Omr.Forms

//////////////////////////////////////////////////////////////////////////////
Set Txt2Set(Text txtInp, // Texto de entrada
            Text sepTok, // Elemento separador
            Text ctrFun) // C->Compacta, N->Not null, U->Unicos, S->Ordena
//////////////////////////////////////////////////////////////////////////////
{
  Text ctrUpp = ToUpper(ctrFun);

  Set  setSep = Tokenizer(Replace(txtInp, sepTok, Char(7)), Char(7));

  Set  setCmp = If(!TextFind(ctrFun, "C"), setSep,
                   EvalSet(setSep, Text(Text eleTxt) { Compact(eleTxt) }));

  Set  setNot = If(!TextFind(ctrFun, "N"), setCmp,
                   Select(setCmp, Real(Text eleTxt) { eleTxt != "" }));

  Set  setUni = If(!TextFind(ctrFun, "U"), setNot, Unique(setNot));

  Set  setSrt = If(!TextFind(ctrFun, "S"), setUni,
                   Sort(setUni, Real(Text a, Text b) { Compare(a,b) }));

  setSrt
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto a partir de un texto txtInp, troceandolo por un separador
sepTok y dependiendo de ctrFun puede, en este orde, si C compactar, si N
eliminar los texto nulos, si U retornar elementos unicos y si S retornar el
set ordenado.",
Txt2Set);
//////////////////////////////////////////////////////////////////////////////

Set Txt2Set() de Ink.Watercolor

//////////////////////////////////////////////////////////////////////////////
Set Txt2Set(Text txt, // Text
            Set  sep, // Set of separators
            Real cmp) // If true apply the Compact() function
//////////////////////////////////////////////////////////////////////////////
{
  Text sepUni = Char(1); // Unique separator with only one character
  Set  sepTab = If(EQ(Card(sep),0), [[ [[";", sepUni ]] ]],
                   EvalSet(sep, Set(Text s) { [[s, sepUni]] }));
  Text txtRep = ReplaceTable(txt, sepTab);
  Set  setTok = Tokenizer(txtRep, sepUni);
  If(cmp, EvalSet(setTok, Text(Text txt) { Compact(txt) }), setTok)
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Returns a set of texts like TOL function Tokenizer().
Can use a set of separators of any length.
Assumes that there are not Char(1) inside the text.
If argument sep is the Empty set then assumes ; as default separator.
If argument cmp is true then apply the Compact() function.
For example: Txt2Set(' a / b -- c / d / e / f ', [['/','--']], TRUE)
returns [['a','b','c','d','e','f']]",
Txt2Set);
//////////////////////////////////////////////////////////////////////////////

Set TxtForChr() de con-Q.tv

//////////////////////////////////////////////////////////////////////////////
Set TxtForChr(Text inpTxt, // Texto de entrada
              Code funChr) // Funcion tipo Anything(Text oneChr)
//////////////////////////////////////////////////////////////////////////////
{
  Real lenTxt = TextLength(inpTxt);
  For(1, lenTxt, Anything(Real posTxt) { funChr(Sub(inpTxt,posTxt,posTxt)) })
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna el conjunto resultado de aplicar la funcion funChr(Text oneChr)
a todos los caracteres del texto de entrada txtInp.
Retorna un Set a imagen de las funciones basicas For() y EvalSet().",
TxtForChr);
//////////////////////////////////////////////////////////////////////////////

Set TxtLineWrap() de Dct.Writer

//////////////////////////////////////////////////////////////////////////////
Set  TxtLineWrap(Text txtInp, // Texto de entrada
                 Real linMax, // Maximo numero de caracteres por linea
                 Real cmpCtr) // Si true entonces compacta
//////////////////////////////////////////////////////////////////////////////
{
  Text txtCmp = If(cmpCtr, Compact(txtInp), txtInp);
  Text txtRev = Reverse(txtCmp);
  Real txtLen = TextLength(txtCmp);
  Set  cutSet = If(LE(txtLen, linMax), [[txtCmp, ""]], // Ya esta hecho
  {
    Real blkPos = TextFind(txtRev, " ", txtLen-linMax); // Busca para atras

    If(GE(blkPos, 1),
    {
      SetOfText(Sub(txtCmp, 0,               txtLen-blkPos),
                Sub(txtCmp, txtLen-blkPos+1, txtLen))
    },
    {
      // No se puede cortar
      Real blkBad = TextFind(txtCmp, " ", linMax+1); // Busca hacia adelante

      If(LT(blkBad, 0), [[txtCmp, ""]], // No hay corte posible
      {
        SetOfText(Sub(txtCmp, 0,        blkBad-1), // Hay un mal corte
                  Sub(txtCmp, blkBad+1, txtLen))
      })
    })
  });
  If(cmpCtr, SetOfText(Compact(cutSet[1]),Compact(cutSet[2])), cutSet)
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto de 2 texto el primero con un máximo de linMax caracteres
y el segundo con el resto.
Es el resultado de cortar txtInp por el primer blanco que permita que el corte
cumpla la condición inicial.
Si el texto de entrada es mas corte que linMax retorna un conjunto formado
por el texto inicial y la tira vacia.
Si el corte es imposible busca el mejor corte posible y si no lo encuentra
retorna un conjunto formado por el texto inicial y la tira vacia.
Si cmpCtr es true los resultados son compactados.
Tambien existe en Tol la funcion Wrap() con ciertas semejanzas,
aunque mas a TxtParagraphWrap().",
TxtLineWrap);
//////////////////////////////////////////////////////////////////////////////

Set TxtParagraphWrap() de Sfk.Wrap

//////////////////////////////////////////////////////////////////////////////
Set  TxtParagraphWrap(Text txtInp, // Texto de entrada
                      Real linMax, // Maximo numero de caracteres por linea
                      Real cmpCtr) // Si true entonces compacta
//////////////////////////////////////////////////////////////////////////////
{
  Set  setTxt = TxtLineWrap(txtInp, linMax, cmpCtr);
  Set  setIni = SetOfText(setTxt[1]);

  If(setTxt[2]=="", setIni,
                    setIni << TxtParagraphWrap(setTxt[2], linMax, cmpCtr))
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto de textos resultado de aplicar recursivamente la funcion
TxtLineWrap(), lo que permite recortar párrafos.
Tambien existe en Tol la funcion Wrap() con ciertas semejanzas.",
TxtParagraphWrap);
//////////////////////////////////////////////////////////////////////////////

Set TxtSplitBy1Tag() de SHi.SyntaxHighlight

//////////////////////////////////////////////////////////////////////////////
Set TxtSplitBy1Tag(Text txtInp, // Texto de entrada
                   Text tagBrk) // Tag por el que se corta
//////////////////////////////////////////////////////////////////////////////
{
  Set txtTok = TxtTokenizer(txtInp, tagBrk);
  For(1, Card(txtTok), Text(Real posTok) // Ciclo para impares y pares
  { If(posTok%2, txtTok[posTok], tagBrk+txtTok[posTok]+tagBrk) })
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto de textos resultado de cortar el texto de entrada por un
unico tag tagBrk incluyendo dicho tagBrk al inicio y al final de cada texto
que enmarca, las ocurrencias impares de tagBrk al inicio del texto y
las impares al final.
Tiene mas sentido cuando el numero de ocurrencias de tagBrk es par.
Si el numero de ocurrencias de tagbrk es impar funciona como si al final del
texto txtInp hubiera una ultima ocurrencia.
Fue una funcion recursiva, pero la versión 2.0.1 de Tol se caia con textos
grandes, ahora es una funcion iterativa.
TxtSplitBy1Tag(aaa|::|bbb|---|ccc, |) -> [aaav, |::|, bbb, |---|, ccc].",
TxtSplitBy1Tag);
//////////////////////////////////////////////////////////////////////////////

Set TxtSplitBy2Fast() de SHi.SyntaxHighlight

//////////////////////////////////////////////////////////////////////////////
Set TxtSplitBy2Fast(Text txtInp, // Texto de entrada
                    Text tagIni, // Tag inicial por el que se corta
                    Text tagEnd) // Tag final por el que se corta
//////////////////////////////////////////////////////////////////////////////
{
  Text chrBrk = Char(7); // Caracter auxiliar de corte que se espera unico
  Text repEnd = Replace(txtInp, tagEnd, tagEnd+chrBrk);
  Set  txtSet = Tokenizer(repEnd, chrBrk);

  Set  cicSet = EvalSet(txtSet, Set(Text txtTok)
  {

    Text repIni = Replace(txtTok, tagIni, chrBrk+tagIni);
    Set  tokSet = Tokenizer(repIni, chrBrk);
    Select(tokSet, Real(Text tokTxt) { tokTxt != "" }) // Elimina los vacios
  });
  BinGroup("<<", cicSet) // De conjunto de pares a conjunto lineal
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto de textos resultado de cortar el texto de entrada por
dos tags de inicio y de fin incluyendo dichos tags al inicio y al final de
cada texto que enmarcan.
Los tags tagIni y tabEnd no han de ser nulos ni iguales, si son iguales lo
correcto seria utilizar la funcion TxtSplitBy1Tag().
No es una funcion recursiva, es mas rapida que TxtSplitBy2Tag() pero menos
resistente a la reiteracion de tags de inicio con un unico final.
No funciona correctamente si los tags inicial y final son iguales.
Tiene mas sentido cuando a cada tag de inicio le corresponde uno de final.
TxtSplitBy2Fast(aaa<::>bbb<--->ccc, <, >) -> [aaav, <::>, bbb, <--->, ccc].",
TxtSplitBy2Fast);
//////////////////////////////////////////////////////////////////////////////

Set TxtSplitBy2Tag() de SHi.SyntaxHighlight

//////////////////////////////////////////////////////////////////////////////
Set TxtSplitBy2Tag(Text txtInp, // Texto de entrada
                   Text tagIni, // Tag inicial por el que se corta
                   Text tagEnd) // Tag final por el que se corta
//////////////////////////////////////////////////////////////////////////////
{
  Real posIni = TextFind(txtInp, tagIni);
  If(LE(posIni,0), SetOfText(txtInp), // Nada que cortar
  {
    Real lenTxt = TextLength(txtInp); // Longitud del texto de entrada
    Real lenIni = TextLength(tagIni); // Longitud del tag inicial
    Real lenEnd = TextLength(tagEnd); // Longitud del tag final
    Real posSub = posIni + lenIni;
    Real posEnd = TextFind(txtInp, tagEnd, posSub);
    Case(
      Or(And(EQ(posIni,1),LE(posEnd,0)), // Ini en posicion 1 pero no termina
         And(EQ(posIni,1),EQ(posEnd+lenEnd-1,lenTxt))), // Justo ini y final
         SetOfText(txtInp),
      Or(And(GT(posIni,1),LE(posEnd,0)), // Ini en posicion >1 pero no termina
         And(GT(posIni,1),EQ(posEnd+lenEnd-1,lenTxt))), // Justo al final
         SetOfText(
           Sub(txtInp, 1,      posIni-1),
           Sub(txtInp, posIni, lenTxt)),
      And(EQ(posIni,1),GT(posEnd,1)), // Inicia en posicion 1, termina y sigue
        SetOfText(
          Sub(txtInp, 1,       posEnd+lenEnd-1)) <<
          TxtSplitBy2Tag(Sub(txtInp, posEnd+lenEnd, lenTxt), tagIni, tagEnd),
      TRUE,                           // Inicia posicion >1, termina y sigue
        SetOfText(
          Sub(txtInp, 1,       posIni-1),
          Sub(txtInp, posIni,  posEnd+lenEnd-1)) <<
          TxtSplitBy2Tag(Sub(txtInp, posEnd+lenEnd, lenTxt), tagIni, tagEnd))
  })
};
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto de textos resultado de cortar el texto de entrada por un
dos tags de inicio y de fin incluyendo dichos tags al inicio y al final de
cada texto que enmarcan.
Los tags tagIni y tabEnd no han de ser nulos.
Tiene mas sentido cuando a cada tag de inicio le corresponde uno de final.
Es una funcion recursiva.
Si funciona correctamente si los tags inicial y final son iguales.
TxtSplitBy2Tag(aaa<::>bbb<--->ccc, <, >) -> [aaav, <::>, bbb, <--->, ccc].",
TxtSplitBy2Tag);
//////////////////////////////////////////////////////////////////////////////

Set TxtTokenizer() de Ink.Watercolor

//////////////////////////////////////////////////////////////////////////////
Set TxtTokenizer(Text txtInp, // Texto de entrada
                 Text tagBrk) // Tag por el que se corta
//////////////////////////////////////////////////////////////////////////////
{ Tokenizer(Replace(txtInp, tagBrk, Char(7)), Char(7)) };
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Returns a set breaking the input text txtInp by the token tagBrk.
The lenght of the token tagBrk can be 1 or more characters. This function use
the Tol function Tokenizer() that breaks by only one character.
This function assume that txtInp do not contain the character 7 (bell).",
TxtTokenizer);
//////////////////////////////////////////////////////////////////////////////

Set TxtTokenizer() de Dct.Writer

//////////////////////////////////////////////////////////////////////////////
Set TxtTokenizer(Text txtInp, // Texto de entrada
                 Text tagBrk) // Tag por el que se corta
//////////////////////////////////////////////////////////////////////////////
{ Tokenizer(Replace(txtInp, tagBrk, Char(7)), Char(7)) };
//////////////////////////////////////////////////////////////////////////////
PutDescription(
"Retorna un conjunto de textos resultado de cortar el texto de entrada por un
unico tag tagBrk no incluyendo el tag tagBrk dentro de los textos.
Se soporta en la funcion Tol Tokenizer() que rompe por un unico caracter.
Usa como caracter interno de corte el 7 (bell), esperando que no aparezca.",
TxtTokenizer);
//////////////////////////////////////////////////////////////////////////////

Secciones de la página

Funciones

Artículos del sitio

Por categorías