Internals of Text Processing¶
generic text processing engine.
Support for text processing:
Define:¶
Functions:¶
1. char_kind_t Yap_wide_chtype(int ch):
< Other, not assigned
< Letter, uppercase
< Letter, lowercase
< Letter, titlecase
< Letter, modifier
< Letter, other
< Mark, nonspacing
< Mark, spacing combining
< Mark, enclosing
< Number, decimal digit
< Number, letter
< Number, other
< Punctuation, connector
< Punctuation, dash
< Punctuation, open
< Punctuation, close
< Punctuation, initial quote
< Punctuation, final quote
< Punctuation, other
< Symbol, math
< Symbol, currency
< Symbol, modifier
unsure in YAP, let's assume a,c us treated as aƧ
< Symbol, other
< Separator, space
< Separator, line
< Separator, paragraph
< Other, control
< Other, format
< Other, surrogate
< Other, private use
1. static char_kind_t chtype(Int ch):
1. const char * Yap_tokText(void *tokptr):
1. Term Yap_tokRep(void *tokptr): : represent token *tokptr in string s, maxlength is sz-1
conversion is based on token type.
represent token *tokptr in string s, maxlength is sz-1
1. Term Yap_tokFullRep(void *tokptr):
1. static seq_type_t mod_to_type(int quote, Term mod USES_REGS):
1. static seq_type_t Yap_TextType(Term t):
1. unsigned char * Yap_readText(seq_tv_t *inp USES_REGS):
1. bool write_Text(unsigned char inp, seq_tv_t out USES_REGS):
1. bool Yap_CVT_Text(seq_tv_t inp, seq_tv_t out USES_REGS):
1. bool Yap_Concat_Text(int n, seq_tv_t inp[], seq_tv_t *out USES_REGS):
1. bool Yap_Splice_Text(int n, ssize_t cuts[], seq_tv_t *inp, seq_tv_t outv[] USES_REGS):
1. unsigned char * Yap_ListOfCodesToBuffer(unsigned char buf, Term t, seq_tv_t inp USES_REGS):
1. unsigned char * Yap_ListOfCharsToBuffer(unsigned char buf, Term t, seq_tv_t inp USES_REGS):
1. static Atom Yap_AtomicToLowAtom(Term t0 USES_REGS):
1. static Atom Yap_AtomicToUpAtom(Term t0 USES_REGS):
1. static Term Yap_AtomicToLowString(Term t0 USES_REGS):
1. static Term Yap_AtomicToUpString(Term t0 USES_REGS):
1. static Term Yap_AtomicToLowListOfCodes(Term t0 USES_REGS):
1. static Term Yap_AtomicToUpListOfCodes(Term t0 USES_REGS):
1. static Term Yap_AtomicToLowListOfAtoms(Term t0 USES_REGS):
1. static Term Yap_AtomicToUpListOfAtoms(Term t0 USES_REGS):
1. static size_t Yap_AtomicToUnicodeLength(Term t0 USES_REGS):
1. static Term Yap_AtomicToListOfAtoms(Term t0 USES_REGS):
1. static Term Yap_AtomicToListOfCodes(Term t0 USES_REGS):
1. static Atom Yap_AtomicToAtom(Term t0 USES_REGS):
1. static size_t Yap_AtomToLength(Term t0 USES_REGS):
1. static size_t Yap_AtomToUnicodeLength(Term t0 USES_REGS):
1. static Term Yap_AtomToListOfAtoms(Term t0 USES_REGS):
1. static Term Yap_AtomSWIToListOfAtoms(Term t0 USES_REGS):
1. static Term Yap_AtomToListOfCodes(Term t0 USES_REGS):
1. static Term Yap_AtomSWIToListOfCodes(Term t0 USES_REGS):
1. static Term Yap_AtomToNumber(Term t0 USES_REGS):
1. static Term Yap_AtomToString(Term t0 USES_REGS):
1. static Term Yap_AtomSWIToString(Term t0 USES_REGS):
1. static Term Yap_AtomicToString(Term t0 USES_REGS):
1. static wchar_t * Yap_AtomToWide(Atom at USES_REGS):
1. static Term Yap_AtomicToTBQ(Term t0, Term mod USES_REGS):
1. static Atom Yap_CharsToAtom(const char *s, encoding_t enc USES_REGS):
1. static Term Yap_CharsToListOfAtoms(const char *s, encoding_t enc USES_REGS):
1. static Term Yap_CharsToListOfCodes(const char *s, encoding_t enc USES_REGS):
1. static Term Yap_UTF8ToListOfCodes(const char *s USES_REGS):
1. static Atom Yap_UTF8ToAtom(const unsigned char *s USES_REGS):
1. static Term Yap_CharsToDiffListOfCodes(const char *s, Term tail, encoding_t enc USES_REGS):
1. static Term Yap_UTF8ToDiffListOfCodes(const unsigned char *s, Term tail USES_REGS):
1. static Term Yap_UTF8ToDiffListOfChars(const unsigned char *s, Term tail USES_REGS):
1. static Term Yap_WCharsToDiffListOfCodes(const wchar_t *s, Term tail USES_REGS):
1. static Term Yap_CharsToString(const char *s, encoding_t enc USES_REGS):
1. static char * Yap_AtomToUTF8Text(Atom at USES_REGS):
1. static Term Yap_QuotedToTerm(int quote, const char *s, Term mod, encoding_t enc USES_REGS):
1. static Atom Yap_ListOfAtomsToAtom(Term t0 USES_REGS):
1. static Term Yap_ListOfAtomsToNumber(Term t0 USES_REGS):
1. static Term Yap_ListOfAtomsToString(Term t0 USES_REGS):
1. static Atom Yap_ListOfCodesToAtom(Term t0 USES_REGS):
1. static Term Yap_ListOfCodesToNumber(Term t0 USES_REGS):
1. static Term Yap_ListOfCodesToString(Term t0 USES_REGS):
1. static Atom Yap_ListToAtom(Term t0 USES_REGS):
1. static Term Yap_ListToAtomic(Term t0 USES_REGS):
1. static Term Yap_ListToNumber(Term t0 USES_REGS):
1. static Term Yap_ListToString(Term t0 USES_REGS):
1. static Term Yap_ListSWIToString(Term t0 USES_REGS):
1. static Atom Yap_NCharsToAtom(const char *s, size_t len, encoding_t enc USES_REGS):
1. static Term Yap_CharsToDiffListOfAtoms(const char *s, encoding_t enc, Term tail USES_REGS):
1. static Term Yap_NCharsToListOfCodes(const char *s, size_t len, encoding_t enc USES_REGS):
1. static Term Yap_NCharsToString(const char *s, size_t len, encoding_t enc USES_REGS):
1. static Atom Yap_NumberToAtom(Term t0 USES_REGS):
1. static Term Yap_NumberToListOfAtoms(Term t0 USES_REGS):
1. static Term Yap_NumberToListOfCodes(Term t0 USES_REGS):
1. static Term Yap_NumberToString(Term t0 USES_REGS):
1. static Atom Yap_NWCharsToAtom(const wchar_t *s, size_t len USES_REGS):
1. static Term Yap_NWCharsToListOfAtoms(const wchar_t *s, size_t len USES_REGS):
1. static Term Yap_NWCharsToListOfCodes(const wchar_t *s, size_t len USES_REGS):
1. static Term Yap_NWCharsToString(const wchar_t *s, size_t len USES_REGS):
1. static Atom Yap_StringToAtom(Term t0 USES_REGS):
1. static Atom Yap_StringSWIToAtom(Term t0 USES_REGS):
1. static Term Yap_StringToAtomic(Term t0 USES_REGS):
1. static size_t Yap_StringToUnicodeLength(Term t0 USES_REGS):
1. static size_t Yap_StringToListOfAtoms(Term t0 USES_REGS):
1. static size_t Yap_StringSWIToListOfAtoms(Term t0 USES_REGS):
1. static size_t Yap_StringToListOfCodes(Term t0 USES_REGS):
1. static size_t Yap_StringSWIToListOfCodes(Term t0 USES_REGS):
1. static Term Yap_StringToNumber(Term t0 USES_REGS):
1. static Atom Yap_TextToAtom(Term t0 USES_REGS):
1. static Term Yap_TextToString(Term t0 USES_REGS):
1. static void Yap_OverwriteUTF8BufferToLowCase(void *buf USES_REGS):
1. static char * Yap_TextTermToText(Term t0 USES_REGS): : Function to convert a generic text term (string, atom, list of codes, list of< atoms) into a buff er.
tthe term
bufthe buffer, if NULL a buffer is malloced, and the user should reclai it
lenbuffer size
encencoding (UTF-8 is strongly recommended)
return:
the buffer, or NULL in case of failure. If so, Yap_Error may be called.
notice that it must be called from a push memory.
1. static const unsigned char * Yap_TextToUTF8Buffer(Term t0 USES_REGS):
1. static Term Yap_UTF8ToString(const char *s USES_REGS):
1. static Atom UTF32ToAtom(const wchar_t *s USES_REGS):
1. static Term Yap_WCharsToListOfCodes(const wchar_t *s USES_REGS):
1. static Term Yap_WCharsToString(const wchar_t *s USES_REGS):
1. static Atom Yap_ConcatAtoms(Term t1, Term t2 USES_REGS):
1. static Atom Yap_ConcatAtomics(Term t1, Term t2 USES_REGS):
1. static Term Yap_ConcatStrings(Term t1, Term t2 USES_REGS):
1. static Atom Yap_SpliceAtom(Term t1, Atom ats[], size_t cut, size_t max USES_REGS):
1. static Atom Yap_SubtractHeadAtom(Term t1, Term th USES_REGS):
1. static Atom Yap_SubtractTailAtom(Term t1, Term th USES_REGS):
1. static Term Yap_SpliceString(Term t1, Term ts[], size_t cut, size_t max USES_REGS):
1. static Term Yap_SubtractHeadString(Term t1, Term th USES_REGS):
1. static Term Yap_SubtractTailString(Term t1, Term th USES_REGS):
1. static Term Yap_MkTextTerm(const char *s, seq_type_t guide USES_REGS): : Convert from a text buffer (8-bit) to a term that has the same type as Tguide
ā* sthe buffer ā * tguidethe guide
ā * return:
the term
Var:¶
1. char_kind_t Yap_chtype0[][]:
Typedef:¶
1. typedef struct text_cvt seq_tv_t: