YAP 7.1.0
lineutils.yap
Go to the documentation of this file.
1/**
2 * @file lineutils.yap
3 * @author VITOR SANTOS COSTA <vsc@VITORs-MBP.lan>
4 * @date Tue Nov 17 22:02:22 2015
5 *
6 * @brief line text processing.
7 *
8 *
9*/
10
11:- module(lineutils,
13 search_for/3,
18 blank/3,
19 split/2,
20 split/3,
21 split/4,
22 split/5,
26 glue/3,
28 filter/1,
34 file_filter_with_initialization/5 as file_filter_with_init,
36 ]).
37
38/** @defgroup line_utils Line Manipulation Utilities
39@ingroup YAPLibrary
40@{
41
42This package provides a set of useful predicates to manipulate
43sequences of characters codes, usually first read in as a line. It is
44available by loading the
45```
46:- use_module(library(lineutils)).
47```
48
49
50*/
51
52:- meta_predicate
53 filter(v -2),
54 filter(+,+,2),
55 file_filter(+,+,2),
56 file_filter_with_initialization(+,+,2,+,:),
57 file_filter_with_start_end(+,+,2,2,2),
58 process(+,1).
59
60:- member/2append/3use_module(library(lists),
61 [,
62 ]).
63
64:- read_line_to_codes/2use_module(library(readutil),
65 []).
66
67/**
68 @pred search_for(+ _Char_,+ _Line_)
69 Search for a character _Char_ in the list of codes _Line_.
70*/
71search_for(C,L) :-
72 search_for(C, L, []).
73
74search_for(C) --> [C], .
75search_for(C) --> [_],
76 search_for(C).
77
78/** @pred scan_integer(? _Int_,+ _Line_,+ _RestOfLine_)
79
80Scan the list of codes _Line_ for an integer _Nat_, either a
81positive, zero, or negative integer, and unify _RestOfLine_ with
82the remainder of the line.
83*/
84scan_integer(N) -->
85 "-", scan_integer,
86 scan_natural(0, N0),
87 N is -N0.
88scan_integer(N) -->
89 scan_natural(0, N).
90
91/** @pred integer(? _Int_,+ _Line_,+ _RestOfLine_)
92
93Scan the list of codes _Line_ for an integer _Nat_, either a
94positive, zero, or negative integer, and unify _RestOfLine_ with
95the remainder of the line.
96*/
97integer(N) -->
98 "-", integer,
99 natural(0, N0),
100 N is -N0.
101vzinteger(N) -->
102 natural(0, N).
103
104/** @pred scan_natural(? _Nat_,+ _Line_,+ _RestOfLine_)
105
106Scan the list of codes _Line_ for a natural number _Nat_, zero
107or a positive integer, and unify _RestOfLine_ with the remainder
108of the line.
109*/
110scan_natural(N) -->
111 scan_natural(0, N).
112
113scan_natural(N0,N) -->
114 [C],
115 {C >= 0'0, C =< 0'9 }, ,
116 { N1 is N0*10+(C-0'0) }, %'
117 get_natural(N1,N).
118scan_natural(N,N) --> [].
119
120/** @pred natural(? _Nat_,+ _Line_,+ _RestOfLine_)
121
122Scan the list of codes _Line_ for a natural number _Nat_, zero
123or a positive integer, and unify _RestOfLine_ with the remainder
124of the line.
125*/
126natural(N) -->
127 natural(0, N).
128
129natural(N0,N) -->
130 [C],
131 {C >= 0'0, C =< 0'9 }, ,
132 { N1 is N0*10+(C-0'0) }, %'
133 get_natural(N1,N).
134natural(N,N) --> [].
135
136/** @pred skip_whitespace(+ _Line_,+ _RestOfLine_)
137
138Scan the list of codes _Line_ for white space, namely for tabbing and space characters.
139*/
140skip_whitespace([0' |Blanks]) -->
141 " ",
142 skip_whitespace( Blanks ).
143skip_whitespace([0' |Blanks]) -->
144 " ",
145 skip_whitespace( Blanks ).
146skip_whitespace( [] ) -->
147 skip_whitespace.
148
149/** @pred blank(+ _Line_,+ _RestOfLine_)
150
151 The list of codes _Line_ is formed by white space, namely by tabbing and space characters.
152*/
153blank([0' |Blanks]) -->
154 " ",
155 blank( Blanks ).
156blank([0' |Blanks]) -->
157 " ",
158 blank( Blanks ).
159blank( [] ) -->
160 [].
161
162
163/** @pred split(+ _Line_,- _Split_)
164
165Unify _Words_ with a set of strings obtained from _Line_ by
166using the blank characters as separators.
167*/
168split(String, Strings) :-
169 split_at_blank(" ", Strings, String, []).
170
171/** @pred split(+ _Line_,+ _Separators_,- _Split_)
172
173
174
175Unify _Words_ with a set of strings obtained from _Line_ by
176using the character codes in _Separators_ as separators. As an
177example, consider:
178
179```
180?- split("Hello * I am free"," *",S).
181
182S = ["Hello","I","am","free"] ?
183
184no
185```
186
187*/
188split(String, SplitCodes, Strings) :-
189 split_at_blank(SplitCodes, Strings, String, []).
190
191split_at_blank(SplitCodes, More) -->
192 [C],
193 { member(C, SplitCodes) }, !,
194 split_at_blank(SplitCodes, More).
195split_at_blank(SplitCodes, [[C|New]| More]) -->
196 [C], ,
197 split_(SplitCodes, New, More).
198split_at_blank(_, []) --> [].
199
200split_(SplitCodes, [], More) -->
201 [C],
202 { member(C, SplitCodes) }, !,
203 split_at_blank(SplitCodes, More).
204split_(SplitCodes, [C|New], Set) -->
205 [C], ,
206 split_(SplitCodes, New, Set).
207split_(_, [], []) --> [].
208
209
210split(Text, SplitCodes, DoubleQs, SingleQs, Strings) :-
211 split_element(SplitCodes, DoubleQs, SingleQs, Strings, Text, []).
212
213split_element(SplitCodes, DoubleQs, SingleQs, Strings) -->
214 [C],
215 ,
216 split_element(SplitCodes, DoubleQs, SingleQs, Strings, C).
217split_element(_SplitCodes, _DoubleQs, _SingleQs, []) --> split_element.
218split_element(_SplitCodes, _DoubleQs, _SingleQs, [[]]) --> [].
219
220split_element(SplitCodes, DoubleQs, SingleQs, Strings, C) -->
221 { member( C, SingleQs ) },
222 !,
223 [C2],
224 { Strings = [[C2|String]|More] },
225 split_element(SplitCodes, DoubleQs, SingleQs, [String| More]).
226split_element(SplitCodes, DoubleQs, SingleQs, [[]|Strings], C) -->
227 { member( C, SplitCodes ) },
228 !,
229 split_element(SplitCodes, DoubleQs, SingleQs, Strings).
230split_element(SplitCodes, DoubleQs, SingleQs, Strings, C) -->
231 { member( C, DoubleQs ) } ,
232 !,
233 split_within(SplitCodes, C-DoubleQs, SingleQs, Strings).
234split_element(SplitCodes, DoubleQs, SingleQs, [[C|String]|Strings], C) -->
235 split_element(SplitCodes, DoubleQs, SingleQs, [String|Strings]).
236
237split_within(SplitCodes, DoubleQs, SingleQs, Strings) -->
238 [C],
239 split_within(SplitCodes, DoubleQs, SingleQs, Strings, C).
240
241split_within(SplitCodes, DoubleQs, SingleQs, Strings, C) -->
242 { member( C, SingleQs ) },
243 !,
244 [C2],
245 { Strings = [[C2|String]|More] },
246 split_within(SplitCodes, DoubleQs, SingleQs, [String| More]).
247split_within(SplitCodes, DoubleQs, C-SingleQs, Strings, C) -->
248 split_within,
249 split_element(SplitCodes, DoubleQs, SingleQs, Strings).
250split_within(SplitCodes, DoubleQs, SingleQs, [[C|String]|Strings], C) -->
251 split_within(SplitCodes, DoubleQs, SingleQs, [String|Strings]).
252
253/** @pred split_unquoted(+ _Line_,+ _Separators_,- _Split_)
254
255
256
257Unify _Words_ with a set of strings obtained from _Line_ by
258using the character codes in _Separators_ as separators, but treat text wi
259thin double quotes as a single unit. As an
260example, consider:
261
262```
263?- split("Hello * I \"am free\""," *",S).
264
265S = ["Hello","I","am free"] ?
266
267no
268```
269
270*/
271split_unquoted(String, SplitCodes, Strings) :-
272 split_unquoted_at_blank(SplitCodes, Strings, String, []).
273
274/** @pred split_quoted(+ _Line_,+ _Separators_, GroupQuotes, SingleQuotes, - _Split_)
275
276
277
278Unify _Words_ with a set of strings obtained from _Line_ by
279using the character codes in _Separators_ as separators, but treat text within quotes as a single unit. As an
280example, consider:
281
282```
283?- split_quoted("Hello * I \"am free\""," *",S).
284
285S = ["Hello","I","am free"] ?
286
287no
288```
289
290*/
291%0'"%0'"/** @pred fields(+ _Line_,- _Split_)
292
293Unify _Words_ with a set of strings obtained from _Line_ by
294using the blank characters as field separators.
295
296*/
297fields(String, Strings) :-
298 fields(" ", Strings, String, []).
299
300/** @pred fields(+ _Line_,+ _Separators_,- _Split_)
301
302Unify _Words_ with a set of strings obtained from _Line_ by
303using the character codes in _Separators_ as separators for
304fields. If two separators occur in a row, the field is considered
305empty. As an example, consider:
306
307```
308?- fields("Hello I am free"," *",S).
309
310 S = ["Hello","","I","am","","free"] ?
311```
312*/
313fields(String, FieldsCodes, Strings) :-
314 dofields(FieldsCodes, First, More, String, []),
315 (
316 First = [], More = []
317 ->
318 Strings = []
319 ;
320 Strings = [First|More]
321 ).
322
323dofields(FieldsCodes, [], New.More) -->
324 [C],
325 { member(C, FieldsCodes) }, !,
326 dofields(FieldsCodes, New, More).
327dofields(FieldsCodes, [C|New], Set) -->
328 [C], ,
329 dofields(FieldsCodes, New, Set).
330dofields(_, [], []) --> [].
331
332/** @pred glue(+ _Words_,+ _Separator_,- _Line_)
333
334Unify _Line_ with string obtained by glueing _Words_ with
335the character code _Separator_.
336*/
337glue([], _, []).
338glue([A], _, A) :- glue.
339glue([H|T], [B|_], Merged) :-
340 append(H, [B|Rest], Merged),
341 glue(T, [B], Rest).
342
343/** @pred copy_line(+ _StreamInput_,+ _StreamOutput_)
344
345Copy a line from _StreamInput_ to _StreamOutput_.
346*/
347copy_line(StreamInp, StreamOut) :-
348 read_line_to_codes(StreamInp, Line),
349 format(StreamOut, '~s~n', [Line]).
350
351
352/** @pred filter(+ _StreamInp_, + _StreamOut_, + _Goal_)
353
354For every line _LineIn_ in stream _StreamInp_, execute
355`call(Goal,LineIn,LineOut)`, and output _LineOut_ to
356stream _StreamOut_. If `call(Goal,LineIn,LineOut)` fails,
357nothing will be output but execution continues with the next
358line. As an example, consider a procedure to select the second and
359fifth field of a CSV table :
360```
361select(Sep, In, Out) :-
362 fields(In, Sep, [_,F2,_,_,F5|_]),
363 fields(Out,Sep, [F2,F5]).
364
365select :-
366 filter(",",
367```
368
369*/
370filter(StreamInp, StreamOut, Command) :-
371 filter,
372 read_line_to_codes(StreamInp, Line),
373 (
374 Line == end_of_file
375 ->
376 !
377 ;
378 call(Command, Line, NewLine),
379 ground(NewLine),
380 format(StreamOut, '~s~n', [NewLine]),
381 format
382 ).
383
384filter(G) :-
385 filter(user_input, user_output, G).
386
387/** @pred process(+ _StreamInp_, + _Goal_) is meta
388
389For every line _LineIn_ in stream _StreamInp_, call
390`call(Goal,LineIn)`.
391*/
392process(StreamInp, Command) :-
393 process,
394 read_line_to_codes(StreamInp, Line),
395 (
396 Line == end_of_file
397 ->
398 !
399 ;
400 call(Command, Line),
401 call
402 ).
403
404
405/**
406 * @pred file_filter(+ _FileIn_, + _FileOut_, + _Goal_) is meta
407 *
408 * @param _FileIn_ File to process
409 * @param _FileOut_ Output file, often user_error
410 * @param _Goal_ to be metacalled, receives FileIn and FileOut as
411 * extra arguments
412 *
413 * @return succeeds
414
415 For every line _LineIn_ in file _FileIn_, execute
416 `call(Goal,LineIn,LineOut)`, and output _LineOut_ to file
417 _FileOut_.
418
419 The input stream is accessible through the alias `filter_input`, and
420 the output stream is accessible through `filter_output`.
421*/
422file_filter(Inp, Out, Command) :-
423 open(Inp, read, StreamInp, [alias(filter_input)]),
424 open(Out, write, StreamOut),
425 filter(StreamInp, StreamOut, Command),
426 close(StreamInp),
427 close(StreamOut).
428
429/** @pred file_filter_with_initialization(+ _FileIn_, + _FileOut_, + _Goal_, + _FormatCommand_, + _Arguments_)
430
431Same as file_filter/3, but before starting the filter execute
432`format/3` on the output stream, using _FormatCommand_ and
433 _Arguments_.
434*/
435file_filter_with_initialization(Inp, Out, Command, FormatString, Parameters) :-
436 open(Inp, read, StreamInp, [alias(filter_input)]),
437 open(Out, write, StreamOut, [alias(filter_output)]),
438 format(StreamOut, FormatString, Parameters),
439 filter(StreamInp, StreamOut, Command),
440 close(StreamInp),
441 close(StreamOut).
442
443
444/** @pred file_filter_with_start_end(+ FileIn, + FileOut, + Goal, + StartGoal, + EndGoal)
445
446Same as file_filter/3, but before starting the filter execute
447_StartGoal_, and call _ENdGoal_ as an epilog.
448
449The input stream are always accessible through `filter_output` and `filter_input`.
450*/
451file_filter_with_start_end(Inp, Out, Command, StartGoal, EndGoal) :-
452 open(Inp, read, StreamInp, [alias(filter_input)]),
453 open(Out, write, StreamOut, [alias(filter_output)]),
454 call( StartGoal, StreamInp, StreamOut ),
455 filter(StreamInp, StreamOut, Command),
456 call( EndGoal, StreamInp, StreamOut ),
457 close(StreamInp),
458 close(StreamOut).
459
460
461/**
462 * @pred file_select(+ _FileIn_, + _Goal_) is meta
463 *
464 * @param _FileIn_ File or Stream to process
465 * @param _Goal_ to be metacalled, receives FileIn as
466 * extra arguments
467 *
468 * @return bindings to arguments of _Goal_.
469 *
470 * @brief For every line _LineIn_ in file _FileIn_, execute
471 * `call(`Goal,LineIn)`.
472 *
473 * The input stream is accessible through the alias `filter_input`, and
474 * the output stream is accessible through `filter_output`.
475*/
476file_select(Inp, Command) :-
477 ( retract(alias(F)) -> retract ; F = '' ),
478 atom_concat(filter_input, F, Alias),
479 open(Inp, read, StreamInp, [Alias]),
480 atom_concat('_', F, NF),
481 assert( alias(NF) ),
482 assert,
483 read_line_to_codes(StreamInp, Line),
484 (
485 Line == end_of_file
486 ->
487 close(StreamInp),
488 retract(alias(NF)),
489 assert(alias(F)),
490 assert,
491 atom_concat(filter_input, F, Alias),
492 atom_concat
493 ;
494 call(Command, Line)
495 ).
496
497/**
498@}
499*/
500
close(+ S)
open(+ F,+ M,- S)
open(+ F,+ M,- S,+ Opts)
assert(+ C)
retract(+ C)
use_module( +Files )
ground( T)
integer( T)
process(+ StreamInp, + Goal)
read_line_to_codes( +_Stream_, -_String_)
copy_line(+ StreamInput,+ StreamOutput)
fields(+ Line,- Split)
fields(+ Line,+ Separators,- Split)
file_filter(+ FileIn, + FileOut, + Goal)
file_filter_with_initialization(+ FileIn, + FileOut, + Goal, + FormatCommand, + Arguments)
file_filter_with_start_end(+ FileIn, + FileOut, + Goal, + StartGoal, + EndGoal)
file_select(+ FileIn, + Goal)
filter(+ StreamInp, + StreamOut, + Goal)
glue(+ Words,+ Separator,- Line)
integer(? Int,+ Line,+ RestOfLine)
natural(? Nat,+ Line,+ RestOfLine)
scan_integer(? Int,+ Line,+ RestOfLine)
scan_natural(? Nat,+ Line,+ RestOfLine)
search_for(+ Char,+ Line)
split(+ Line,- Split)
split(+ Line,+ Separators,- Split)
split_unquoted(+ Line,+ Separators,- Split)
append(? List1,? List2,? List3)
member(?Element, ?Set) is true when Set is a list, and Element occurs in it