Text-CSV_XS/CSV_XS.xs
/* Copyright (c) 2007-2024 H.Merijn Brand. All rights reserved.
* Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
* This program is free software; you can redistribute it and/or
* modify it under the same terms as Perl itself.
*/
#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
#define DPPP_PL_parser_NO_DUMMY
#define NEED_utf8_to_uvchr_buf
#define NEED_my_snprintf
#define NEED_pv_escape
#define NEED_pv_pretty
#ifndef PERLIO_F_UTF8
# define PERLIO_F_UTF8 0x00008000
# endif
#ifndef MAXINT
# define MAXINT ((int)(~(unsigned)0 >> 1))
# endif
#include "ppport.h"
#define is_utf8_sv(s) is_utf8_string ((U8 *)SvPV_nolen (s), SvCUR (s))
#define MAINT_DEBUG 0
#define BUFFER_SIZE 1024
#define CSV_XS_TYPE_WARN 1
#define CSV_XS_TYPE_PV 0
#define CSV_XS_TYPE_IV 1
#define CSV_XS_TYPE_NV 2
/* maximum length for EOL, SEP, and QUOTE - keep in sync with .pm */
#define MAX_ATTR_LEN 16
#define CSV_FLAGS_QUO 0x0001
#define CSV_FLAGS_BIN 0x0002
#define CSV_FLAGS_EIF 0x0004
#define CSV_FLAGS_MIS 0x0010
#define HOOK_ERROR 0x0001
#define HOOK_AFTER_PARSE 0x0002
#define HOOK_BEFORE_PRINT 0x0004
#ifdef __THW_370__
/* EBCDIC on os390 z/OS: IS_EBCDIC reads better than __THW_370__ */
#define IS_EBCDIC
#endif
#define CH_TAB '\t'
#define CH_NL '\n'
#define CH_CR '\r'
#define CH_SPACE ' '
#define CH_QUO '"'
#ifdef IS_EBCDIC
#define CH_DEL '\007'
static unsigned char ec, ebcdic2ascii[256] = {
0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f,
0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87,
0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae,
0xac, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7,
0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
/* v this 0xa0 really should be 0xad. Needed for UTF = binary */
0x48, 0x49, 0xa0, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff,
0x5c, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
};
#define is_csv_binary(ch) ((((ec = ebcdic2ascii[ch]) < 0x20 || ec >= 0x7f) && ch != CH_TAB) || ch == EOF)
#else
#define CH_DEL '\177'
#define is_csv_binary(ch) ((ch < CH_SPACE || ch >= CH_DEL) && ch != CH_TAB)
#endif
#define CH_EOLX 1215
#define CH_EOL *csv->eol
#define CH_SEPX 8888
#define CH_SEP *csv->sep
#define CH_QUOTEX 8889
#define CH_QUOTE *csv->quo
#define useIO_EOF 0x10
#define unless(expr) if (!(expr))
#define _is_reftype(f,x) \
(f && ((SvGMAGICAL (f) && mg_get (f)) || 1) && SvROK (f) && SvTYPE (SvRV (f)) == x)
#define _is_arrayref(f) _is_reftype (f, SVt_PVAV)
#define _is_hashref(f) _is_reftype (f, SVt_PVHV)
#define _is_coderef(f) _is_reftype (f, SVt_PVCV)
#define SvSetUndef(sv) sv_setpvn (sv, NULL, 0)
#define SvSetEmpty(sv) sv_setpvn_mg (sv, "", 0)
#define CSV_XS_SELF \
if (!self || !SvOK (self) || !SvROK (self) || \
SvTYPE (SvRV (self)) != SVt_PVHV) \
croak ("self is not a hash ref"); \
hv = (HV *)SvRV (self)
/* Keep in sync with .pm! */
#define CACHE_ID_quote_char 0
#define CACHE_ID_escape_char 1
#define CACHE_ID_sep_char 2
#define CACHE_ID_binary 3
#define CACHE_ID_keep_meta_info 4
#define CACHE_ID_always_quote 5
#define CACHE_ID_allow_loose_quotes 6
#define CACHE_ID_allow_loose_escapes 7
#define CACHE_ID_allow_unquoted_escape 8
#define CACHE_ID_allow_whitespace 9
#define CACHE_ID_blank_is_undef 10
#define CACHE_ID_sep 39
#define CACHE_ID_sep_len 38
#define CACHE_ID_eol 11
#define CACHE_ID_eol_len 12
#define CACHE_ID_eol_is_cr 13
#define CACHE_ID_quo 15
#define CACHE_ID_quo_len 16
#define CACHE_ID_verbatim 22
#define CACHE_ID_empty_is_undef 23
#define CACHE_ID_auto_diag 24
#define CACHE_ID_quote_space 25
#define CACHE_ID_quote_empty 37
#define CACHE_ID__is_bound 26
#define CACHE_ID__has_ahead 30
#define CACHE_ID_escape_null 31
#define CACHE_ID_quote_binary 32
#define CACHE_ID_diag_verbose 33
#define CACHE_ID_has_error_input 34
#define CACHE_ID_decode_utf8 35
#define CACHE_ID__has_hooks 36
#define CACHE_ID_formula 38
#define CACHE_ID_strict 42
#define CACHE_ID_skip_empty_rows 43
#define CACHE_ID_undef_str 46
#define CACHE_ID_comment_str 54
#define CACHE_ID_types 62
#define byte unsigned char
#define ulng unsigned long
typedef struct {
byte quote_char;
byte escape_char;
byte fld_idx;
byte binary;
byte keep_meta_info;
byte always_quote;
byte useIO; /* Also used to indicate EOF */
byte eol_is_cr;
byte allow_loose_quotes;
byte allow_loose_escapes;
byte allow_unquoted_escape;
byte allow_whitespace;
byte blank_is_undef;
byte empty_is_undef;
byte verbatim;
byte auto_diag;
byte quote_space;
byte escape_null;
byte quote_binary;
byte first_safe_char;
byte diag_verbose;
byte has_error_input;
byte decode_utf8;
byte has_hooks;
byte quote_empty;
byte formula;
byte utf8;
byte has_ahead;
byte eolx;
byte strict;
short strict_n;
byte skip_empty_rows;
long is_bound;
ulng recno;
byte * cache;
SV * pself; /* PL_self, for error_diag */
HV * self;
SV * bound;
char * types;
byte eol_len;
byte sep_len;
byte quo_len;
byte types_len;
char * bptr;
SV * tmp;
byte undef_flg;
byte * undef_str;
byte * comment_str;
int eol_pos;
STRLEN size;
STRLEN used;
byte eol[MAX_ATTR_LEN];
byte sep[MAX_ATTR_LEN];
byte quo[MAX_ATTR_LEN];
char buffer[BUFFER_SIZE];
} csv_t;
#define bool_opt_def(o,d) \
(((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvTRUE (*svp) : d)
#define bool_opt(o) bool_opt_def (o, 0)
#define num_opt_def(o,d) \
(((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvIV (*svp) : d)
#define num_opt(o) num_opt_def (o, 0)
typedef struct {
int xs_errno;
char *xs_errstr;
} xs_error_t;
static const xs_error_t xs_errors[] = {
/* Generic errors */
{ 1000, "INI - constructor failed" },
{ 1001, "INI - sep_char is equal to quote_char or escape_char" },
{ 1002, "INI - allow_whitespace with escape_char or quote_char SP or TAB" },
{ 1003, "INI - \\r or \\n in main attr not allowed" },
{ 1004, "INI - callbacks should be undef or a hashref" },
{ 1005, "INI - EOL too long" },
{ 1006, "INI - SEP too long" },
{ 1007, "INI - QUOTE too long" },
{ 1008, "INI - SEP undefined" },
{ 1010, "INI - the header is empty" },
{ 1011, "INI - the header contains more than one valid separator" },
{ 1012, "INI - the header contains an empty field" },
{ 1013, "INI - the header contains nun-unique fields" },
{ 1014, "INI - header called on undefined stream" },
/* Syntax errors */
{ 1500, "PRM - Invalid/unsupported argument(s)" },
{ 1501, "PRM - The key attribute is passed as an unsupported type" },
{ 1502, "PRM - The value attribute is passed without the key attribute" },
{ 1503, "PRM - The value attribute is passed as an unsupported type" },
/* Parse errors */
{ 2010, "ECR - QUO char inside quotes followed by CR not part of EOL" },
{ 2011, "ECR - Characters after end of quoted field" },
{ 2012, "EOF - End of data in parsing input stream" },
{ 2013, "ESP - Specification error for fragments RFC7111" },
{ 2014, "ENF - Inconsistent number of fields" },
{ 2015, "ERW - Empty row" },
/* EIQ - Error Inside Quotes */
{ 2021, "EIQ - NL char inside quotes, binary off" },
{ 2022, "EIQ - CR char inside quotes, binary off" },
{ 2023, "EIQ - QUO character not allowed" },
{ 2024, "EIQ - EOF cannot be escaped, not even inside quotes" },
{ 2025, "EIQ - Loose unescaped escape" },
{ 2026, "EIQ - Binary character inside quoted field, binary off" },
{ 2027, "EIQ - Quoted field not terminated" },
/* EIF - Error Inside Field */
{ 2030, "EIF - NL char inside unquoted verbatim, binary off" },
{ 2031, "EIF - CR char is first char of field, not part of EOL" },
{ 2032, "EIF - CR char inside unquoted, not part of EOL" },
{ 2034, "EIF - Loose unescaped quote" },
{ 2035, "EIF - Escaped EOF in unquoted field" },
{ 2036, "EIF - ESC error" },
{ 2037, "EIF - Binary character in unquoted field, binary off" },
/* Combine errors */
{ 2110, "ECB - Binary character in Combine, binary off" },
/* IO errors */
{ 2200, "EIO - print to IO failed. See errno" },
/* Hash-Ref errors */
{ 3001, "EHR - Unsupported syntax for column_names ()" },
{ 3002, "EHR - getline_hr () called before column_names ()" },
{ 3003, "EHR - bind_columns () and column_names () fields count mismatch" },
{ 3004, "EHR - bind_columns () only accepts refs to scalars" },
{ 3006, "EHR - bind_columns () did not pass enough refs for parsed fields" },
{ 3007, "EHR - bind_columns needs refs to writable scalars" },
{ 3008, "EHR - unexpected error in bound fields" },
{ 3009, "EHR - print_hr () called before column_names ()" },
{ 3010, "EHR - print_hr () called with invalid arguments" },
{ 4001, "PRM - The key does not exist as field in the data" },
{ 5001, "PRM - The result does not match the output to append to" },
{ 5002, "PRM - Unsupported output" },
{ 0, "" },
};
static int last_error = 0;
static SV *m_getline, *m_print;
#define is_EOL(c) (c == CH_EOLX)
#define __is_SEPX(c) (c == CH_SEP && (csv->sep_len == 0 || (\
csv->size - csv->used >= (STRLEN)csv->sep_len - 1 &&\
!memcmp (csv->bptr + csv->used, csv->sep + 1, csv->sep_len - 1) &&\
(csv->used += csv->sep_len - 1) &&\
(c = CH_SEPX))))
#if MAINT_DEBUG > 1
static byte _is_SEPX (unsigned int c, csv_t *csv, int line) {
unsigned int b = __is_SEPX (c);
(void)fprintf (stderr, "# %4d - is_SEPX:\t%d (%d)\n", line, b, csv->sep_len);
if (csv->sep_len)
(void)fprintf (stderr,
"# len: %d, siz: %d, usd: %d, c: %03x, *sep: %03x\n",
csv->sep_len, csv->size, csv->used, c, CH_SEP);
return b;
} /* _is_SEPX */
#define is_SEP(c) _is_SEPX (c, csv, __LINE__)
#else
#define is_SEP(c) __is_SEPX (c)
#endif
#define __is_QUOTEX(c) (CH_QUOTE && c == CH_QUOTE && (csv->quo_len == 0 || (\
csv->size - csv->used >= (STRLEN)csv->quo_len - 1 &&\
!memcmp (csv->bptr + csv->used, csv->quo + 1, csv->quo_len - 1) &&\
(csv->used += csv->quo_len - 1) &&\
(c = CH_QUOTEX))))
#if MAINT_DEBUG > 1
static byte _is_QUOTEX (unsigned int c, csv_t *csv, int line) {
unsigned int b = __is_QUOTEX (c);
(void)fprintf (stderr, "# %4d - is_QUOTEX:\t%d (%d)\n", line, b, csv->quo_len);
if (csv->quo_len)
(void)fprintf (stderr,
"# len: %d, siz: %d, usd: %d, c: %03x, *quo: %03x\n",
csv->quo_len, csv->size, csv->used, c, CH_QUOTE);
return b;
} /* _is_QUOTEX */
#define is_QUOTE(c) _is_QUOTEX (c, csv, __LINE__)
#else
#define is_QUOTE(c) __is_QUOTEX (c)
#endif
#define is_whitespace(ch) \
( (ch) != CH_SEP && \
(ch) != CH_QUOTE && \
(ch) != csv->escape_char && \
( (ch) == CH_SPACE || \
(ch) == CH_TAB \
) \
)
#define SvDiag(xse) cx_SvDiag (aTHX_ xse)
static SV *cx_SvDiag (pTHX_ int xse) {
int i = 0;
SV *err;
while (xs_errors[i].xs_errno && xs_errors[i].xs_errno != xse) i++;
if ((err = newSVpv (xs_errors[i].xs_errstr, 0))) {
(void)SvUPGRADE (err, SVt_PVIV);
SvIV_set (err, xse);
SvIOK_on (err);
}
return (err);
} /* SvDiag */
/* This function should be altered to deal with the optional extra argument
* that holds the replacement message */
#define SetDiag(csv,xse) cx_SetDiag (aTHX_ csv, xse)
static SV *cx_SetDiag (pTHX_ csv_t *csv, int xse) {
dSP;
SV *err = SvDiag (xse);
SV *pself = csv->pself;
last_error = xse;
(void)hv_store (csv->self, "_ERROR_DIAG", 11, err, 0);
if (xse == 0) {
(void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (0), 0);
(void)hv_store (csv->self, "_ERROR_FLD", 10, newSViv (0), 0);
(void)hv_store (csv->self, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
csv->has_error_input = 0;
}
if (xse == 2012) /* EOF */
(void)hv_store (csv->self, "_EOF", 4, &PL_sv_yes, 0);
if (csv->auto_diag) {
unless (_is_hashref (pself))
pself = newRV_inc ((SV *)csv->self);
ENTER;
PUSHMARK (SP);
XPUSHs (pself);
PUTBACK;
call_pv ("Text::CSV_XS::error_diag", G_VOID | G_DISCARD);
LEAVE;
unless (pself == csv->pself)
sv_free (pself);
}
return (err);
} /* SetDiag */
#define xs_cache_set(hv,idx,val) cx_xs_cache_set (aTHX_ hv, idx, val)
static void cx_xs_cache_set (pTHX_ HV *hv, int idx, SV *val) {
SV **svp;
byte *cache;
csv_t csvs;
csv_t *csv = &csvs;
IV iv;
byte bv;
char *cp = "\0";
STRLEN len = 0;
unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp)
return;
cache = (byte *)SvPV_nolen (*svp);
(void)memcpy (csv, cache, sizeof (csv_t));
if (SvPOK (val))
cp = SvPV (val, len);
if (SvIOK (val))
iv = SvIV (val);
else if (SvNOK (val)) /* Needed for 5.6.x but safe for 5.8.x+ */
iv = (IV)SvNV (val); /* uncoverable statement ancient perl required */
else
iv = *cp;
bv = (unsigned)iv & 0xff;
switch (idx) {
/* single char/byte */
case CACHE_ID_sep_char:
CH_SEP = *cp;
csv->sep_len = 0;
break;
case CACHE_ID_quote_char:
CH_QUOTE = *cp;
csv->quo_len = 0;
break;
case CACHE_ID_escape_char: csv->escape_char = *cp; break;
/* boolean/numeric */
case CACHE_ID_binary: csv->binary = bv; break;
case CACHE_ID_keep_meta_info: csv->keep_meta_info = bv; break;
case CACHE_ID_always_quote: csv->always_quote = bv; break;
case CACHE_ID_quote_empty: csv->quote_empty = bv; break;
case CACHE_ID_quote_space: csv->quote_space = bv; break;
case CACHE_ID_escape_null: csv->escape_null = bv; break;
case CACHE_ID_quote_binary: csv->quote_binary = bv; break;
case CACHE_ID_decode_utf8: csv->decode_utf8 = bv; break;
case CACHE_ID_allow_loose_escapes: csv->allow_loose_escapes = bv; break;
case CACHE_ID_allow_loose_quotes: csv->allow_loose_quotes = bv; break;
case CACHE_ID_allow_unquoted_escape: csv->allow_unquoted_escape = bv; break;
case CACHE_ID_allow_whitespace: csv->allow_whitespace = bv; break;
case CACHE_ID_blank_is_undef: csv->blank_is_undef = bv; break;
case CACHE_ID_empty_is_undef: csv->empty_is_undef = bv; break;
case CACHE_ID_formula: csv->formula = bv; break;
case CACHE_ID_strict: csv->strict = bv; break;
case CACHE_ID_verbatim: csv->verbatim = bv; break;
case CACHE_ID_skip_empty_rows: csv->skip_empty_rows = bv; break;
case CACHE_ID_auto_diag: csv->auto_diag = bv; break;
case CACHE_ID_diag_verbose: csv->diag_verbose = bv; break;
case CACHE_ID__has_ahead: csv->has_ahead = bv; break;
case CACHE_ID__has_hooks: csv->has_hooks = bv; break;
case CACHE_ID_has_error_input: csv->has_error_input = bv; break;
/* a 4-byte IV */
case CACHE_ID__is_bound: csv->is_bound = iv; break;
/* string */
case CACHE_ID_sep:
(void)memcpy (csv->sep, cp, len);
csv->sep_len = len == 1 ? 0 : len;
break;
case CACHE_ID_quo:
(void)memcpy (csv->quo, cp, len);
csv->quo_len = len == 1 ? 0 : len;
break;
case CACHE_ID_eol:
(void)memcpy (csv->eol, cp, len);
csv->eol_len = len;
csv->eol_is_cr = len == 1 && *cp == CH_CR ? 1 : 0;
break;
case CACHE_ID_undef_str:
if (*cp) {
csv->undef_str = (byte *)cp;
if (SvUTF8 (val))
csv->undef_flg = 3;
}
else {
csv->undef_str = NULL;
csv->undef_flg = 0;
}
break;
case CACHE_ID_comment_str:
csv->comment_str = *cp ? (byte *)cp : NULL;
break;
case CACHE_ID_types:
if (cp && len) {
csv->types = cp;
csv->types_len = len;
}
else {
csv->types = NULL;
csv->types_len = 0;
}
break;
default:
warn ("Unknown cache index %d ignored\n", idx);
}
csv->cache = cache;
(void)memcpy (cache, csv, sizeof (csv_t));
} /* cache_set */
#define _pretty_strl(cp) cx_pretty_str (aTHX_ cp, strlen (cp))
#define _pretty_str(cp,xse) cx_pretty_str (aTHX_ cp, xse)
static char *cx_pretty_str (pTHX_ byte *s, STRLEN l) {
SV *dsv = newSVpvs_flags ("", SVs_TEMP);
return (pv_pretty (dsv, (char *)s, l, 0, NULL, NULL,
(PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT)));
} /* _pretty_str */
#define _pretty_sv(cp) cx_pretty_sv (aTHX_ cp)
static char *cx_pretty_sv (pTHX_ SV *sv) {
SV *dsv = newSVpvs_flags ("", SVs_TEMP);
if (SvOK (sv) && SvPOK (sv)) {
STRLEN l;
char *s = SvPV (sv, l);
return _pretty_str (s, l);
}
return ("");
} /* _pretty_sv */
#define _cache_show_byte(trim,c) \
warn (" %-21s %02x:%3d\n", trim, c, c)
#define _cache_show_char(trim,c) \
warn (" %-21s %02x:%s\n", trim, c, _pretty_str (&c, 1))
#define _cache_show_str(trim,l,str) \
warn (" %-21s %3d:%s\n", trim, l, _pretty_str (str, l))
#define _csv_diag(csv) _xs_csv_diag (aTHX_ csv)
static void _xs_csv_diag (pTHX_ csv_t *csv) {
warn ("CACHE:\n");
_cache_show_char ("quote_char", CH_QUOTE);
_cache_show_char ("escape_char", csv->escape_char);
_cache_show_char ("sep_char", CH_SEP);
_cache_show_byte ("binary", csv->binary);
_cache_show_byte ("decode_utf8", csv->decode_utf8);
_cache_show_byte ("allow_loose_escapes", csv->allow_loose_escapes);
_cache_show_byte ("allow_loose_quotes", csv->allow_loose_quotes);
_cache_show_byte ("allow_unquoted_escape", csv->allow_unquoted_escape);
_cache_show_byte ("allow_whitespace", csv->allow_whitespace);
_cache_show_byte ("always_quote", csv->always_quote);
_cache_show_byte ("quote_empty", csv->quote_empty);
_cache_show_byte ("quote_space", csv->quote_space);
_cache_show_byte ("escape_null", csv->escape_null);
_cache_show_byte ("quote_binary", csv->quote_binary);
_cache_show_byte ("auto_diag", csv->auto_diag);
_cache_show_byte ("diag_verbose", csv->diag_verbose);
_cache_show_byte ("formula", csv->formula);
_cache_show_byte ("strict", csv->strict);
_cache_show_byte ("strict_n", csv->strict_n);
_cache_show_byte ("skip_empty_rows", csv->skip_empty_rows);
_cache_show_byte ("has_error_input", csv->has_error_input);
_cache_show_byte ("blank_is_undef", csv->blank_is_undef);
_cache_show_byte ("empty_is_undef", csv->empty_is_undef);
_cache_show_byte ("has_ahead", csv->has_ahead);
_cache_show_byte ("keep_meta_info", csv->keep_meta_info);
_cache_show_byte ("verbatim", csv->verbatim);
_cache_show_byte ("useIO", csv->useIO);
_cache_show_byte ("has_hooks", csv->has_hooks);
_cache_show_byte ("eol_is_cr", csv->eol_is_cr);
_cache_show_byte ("eol_len", csv->eol_len);
_cache_show_str ("eol", csv->eol_len, csv->eol);
_cache_show_byte ("sep_len", csv->sep_len);
if (csv->sep_len > 1)
_cache_show_str ("sep", csv->sep_len, csv->sep);
_cache_show_byte ("quo_len", csv->quo_len);
if (csv->quo_len > 1)
_cache_show_str ("quote", csv->quo_len, csv->quo);
if (csv->types_len)
_cache_show_str ("types", csv->types_len, (byte *)csv->types);
else
_cache_show_str ("types", 0, (byte *)"");
if (csv->bptr)
_cache_show_str ("bptr", (int)strlen (csv->bptr), (byte *)csv->bptr);
if (csv->tmp && SvPOK (csv->tmp)) {
char *s = SvPV_nolen (csv->tmp);
_cache_show_str ("tmp", (int)strlen (s), (byte *)s);
}
if (csv->cache)
warn (" %-20s %4d:0x%08lx\n", "cache", (int)sizeof (csv_t), (unsigned long)csv->cache);
else
warn (" %-22s --:no cache yet\n", "cache");
} /* _csv_diag */
#define xs_cache_diag(hv) cx_xs_cache_diag (aTHX_ hv)
static void cx_xs_cache_diag (pTHX_ HV *hv) {
SV **svp;
byte *cache;
csv_t csvs;
csv_t *csv = &csvs;
unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp) {
warn ("CACHE: invalid\n");
return;
}
cache = (byte *)SvPV_nolen (*svp);
(void)memcpy (csv, cache, sizeof (csv_t));
_csv_diag (csv);
} /* xs_cache_diag */
#define set_eol_is_cr(csv) cx_set_eol_is_cr (aTHX_ csv)
static void cx_set_eol_is_cr (pTHX_ csv_t *csv) {
csv->eol[0] = CH_CR;
csv->eol_is_cr = 1;
csv->eol_len = 1;
(void)memcpy (csv->cache, csv, sizeof (csv_t));
(void)hv_store (csv->self, "eol", 3, newSVpvn ((char *)csv->eol, 1), 0);
} /* set_eol_is_cr */
#define SetupCsv(csv,self,pself) cx_SetupCsv (aTHX_ csv, self, pself)
static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself) {
SV **svp;
STRLEN len;
char *ptr;
last_error = 0;
if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
byte *cache = (byte *)SvPVX (*svp);
(void)memcpy (csv, cache, sizeof (csv_t));
}
else {
SV *sv_cache;
(void)memset (csv, 0, sizeof (csv_t)); /* Reset everything */
csv->self = self;
csv->pself = pself;
CH_SEP = ',';
if ((svp = hv_fetchs (self, "sep_char", FALSE)) && *svp && SvOK (*svp))
CH_SEP = *SvPV (*svp, len);
if ((svp = hv_fetchs (self, "sep", FALSE)) && *svp && SvOK (*svp)) {
ptr = SvPV (*svp, len);
(void)memcpy (csv->sep, ptr, len);
if (len > 1)
csv->sep_len = len;
}
CH_QUOTE = '"';
if ((svp = hv_fetchs (self, "quote_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
CH_QUOTE = len ? *ptr : (char)0;
}
else
CH_QUOTE = (char)0;
}
if ((svp = hv_fetchs (self, "quote", FALSE)) && *svp && SvOK (*svp)) {
ptr = SvPV (*svp, len);
(void)memcpy (csv->quo, ptr, len);
if (len > 1)
csv->quo_len = len;
}
csv->escape_char = '"';
if ((svp = hv_fetchs (self, "escape_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
csv->escape_char = len ? *ptr : (char)0;
}
else
csv->escape_char = (char)0;
}
if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
char *eol = SvPV (*svp, len);
(void)memcpy (csv->eol, eol, len);
csv->eol_len = len;
if (len == 1 && *csv->eol == CH_CR)
csv->eol_is_cr = 1;
}
csv->undef_flg = 0;
if ((svp = hv_fetchs (self, "undef_str", FALSE)) && *svp && SvOK (*svp)) {
/*if (sv && (SvOK (sv) || (
(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv))))) {*/
csv->undef_str = (byte *)SvPV_nolen (*svp);
if (SvUTF8 (*svp))
csv->undef_flg = 3;
}
else
csv->undef_str = NULL;
if ((svp = hv_fetchs (self, "comment_str", FALSE)) && *svp && SvOK (*svp))
csv->comment_str = (byte *)SvPV_nolen (*svp);
else
csv->comment_str = NULL;
if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
csv->types = SvPV (*svp, len);
csv->types_len = len;
}
if ((svp = hv_fetchs (self, "_is_bound", FALSE)) && *svp && SvOK (*svp))
csv->is_bound = SvIV (*svp);
if ((svp = hv_fetchs (self, "callbacks", FALSE)) && _is_hashref (*svp)) {
HV *cb = (HV *)SvRV (*svp);
if ((svp = hv_fetchs (cb, "after_parse", FALSE)) && _is_coderef (*svp))
csv->has_hooks |= HOOK_AFTER_PARSE;
if ((svp = hv_fetchs (cb, "before_print", FALSE)) && _is_coderef (*svp))
csv->has_hooks |= HOOK_BEFORE_PRINT;
}
csv->binary = bool_opt ("binary");
csv->decode_utf8 = bool_opt ("decode_utf8");
csv->always_quote = bool_opt ("always_quote");
csv->strict = bool_opt ("strict");
csv->quote_empty = bool_opt ("quote_empty");
csv->quote_space = bool_opt_def ("quote_space", 1);
csv->escape_null = bool_opt_def ("escape_null", 1);
csv->quote_binary = bool_opt_def ("quote_binary", 1);
csv->allow_loose_quotes = bool_opt ("allow_loose_quotes");
csv->allow_loose_escapes = bool_opt ("allow_loose_escapes");
csv->allow_unquoted_escape = bool_opt ("allow_unquoted_escape");
csv->allow_whitespace = bool_opt ("allow_whitespace");
csv->blank_is_undef = bool_opt ("blank_is_undef");
csv->empty_is_undef = bool_opt ("empty_is_undef");
csv->verbatim = bool_opt ("verbatim");
csv->auto_diag = num_opt ("auto_diag");
csv->diag_verbose = num_opt ("diag_verbose");
csv->keep_meta_info = num_opt ("keep_meta_info");
csv->skip_empty_rows = num_opt ("skip_empty_rows");
csv->formula = num_opt ("formula");
unless (csv->escape_char) csv->escape_null = 0;
sv_cache = newSVpvn ((char *)csv, sizeof (csv_t));
csv->cache = (byte *)SvPVX (sv_cache);
SvREADONLY_on (sv_cache);
(void)memcpy (csv->cache, csv, sizeof (csv_t));
(void)hv_store (self, "_CACHE", 6, sv_cache, 0);
}
csv->utf8 = 0;
csv->size = 0;
csv->used = 0;
/* This is EBCDIC-safe, as it is used after translation */
csv->first_safe_char = csv->quote_space ? 0x21 : 0x20;
if (csv->is_bound) {
if ((svp = hv_fetchs (self, "_BOUND_COLUMNS", FALSE)) && _is_arrayref (*svp))
csv->bound = *svp;
else
csv->is_bound = 0;
}
csv->eol_pos = -1;
csv->eolx = csv->eol_len
? csv->verbatim || csv->eol_len >= 2
? 1
: csv->eol[0] == CH_CR || csv->eol[0] == CH_NL
? 0
: 1
: 0;
if (csv->sep_len > 1 && is_utf8_string ((U8 *)(csv->sep), csv->sep_len))
csv->utf8 = 1;
if (csv->quo_len > 1 && is_utf8_string ((U8 *)(csv->quo), csv->quo_len))
csv->utf8 = 1;
} /* SetupCsv */
#define Print(csv,dst) cx_Print (aTHX_ csv, dst)
static int cx_Print (pTHX_ csv_t *csv, SV *dst) {
int result;
int keep = 0;
if (csv->useIO) {
SV *tmp = newSVpvn_flags (csv->buffer, csv->used, SVs_TEMP);
dSP;
PUSHMARK (sp);
EXTEND (sp, 2);
PUSHs ((dst));
if (csv->utf8) {
STRLEN len;
char *ptr;
int j;
ptr = SvPV (tmp, len);
while (len > 0 && !is_utf8_sv (tmp) && keep < 16) {
ptr[--len] = (char)0;
SvCUR_set (tmp, len);
keep++;
}
for (j = 0; j < keep; j++)
csv->buffer[j] = csv->buffer[csv->used - keep + j];
SvUTF8_on (tmp);
}
PUSHs (tmp);
PUTBACK;
result = call_sv (m_print, G_METHOD);
SPAGAIN;
if (result) {
result = POPi;
unless (result)
(void)SetDiag (csv, 2200);
}
PUTBACK;
}
else {
sv_catpvn (SvRV (dst), csv->buffer, csv->used);
result = TRUE;
}
if (csv->utf8 && !csv->useIO && csv->decode_utf8
&& SvROK (dst) && is_utf8_sv (SvRV (dst)))
SvUTF8_on (SvRV (dst));
csv->used = keep;
return result;
} /* Print */
#define CSV_PUT(csv,dst,c) { \
if ((csv)->used == sizeof ((csv)->buffer) - 1) { \
unless (Print ((csv), (dst))) \
return FALSE; \
} \
(csv)->buffer[(csv)->used++] = (c); \
}
#define bound_field(csv,i,keep) cx_bound_field (aTHX_ csv, i, keep)
static SV *cx_bound_field (pTHX_ csv_t *csv, SSize_t i, int keep) {
SV *sv = csv->bound;
AV *av;
/* fprintf (stderr, "# New bind %d/%d\n", i, csv->is_bound);\ */
if (i >= csv->is_bound) {
(void)SetDiag (csv, 3006);
return (NULL);
}
if (sv && SvROK (sv)) {
av = (AV *)(SvRV (sv));
/* fprintf (stderr, "# Bind %d/%d/%d\n", i, csv->is_bound, av_len (av)); */
sv = *av_fetch (av, i, FALSE);
if (sv && SvROK (sv)) {
sv = SvRV (sv);
if (keep)
return (sv);
unless (SvREADONLY (sv)) {
SvSetEmpty (sv);
return (sv);
}
}
}
(void)SetDiag (csv, 3008);
return (NULL);
} /* bound_field */
#define was_quoted(mf,idx) cx_was_quoted (aTHX_ mf, idx)
static int cx_was_quoted (pTHX_ AV *mf, int idx) {
SV **x = av_fetch (mf, idx, FALSE);
return (x && SvIOK (*x) && SvIV (*x) & CSV_FLAGS_QUO ? 1 : 0);
} /* was_quoted */
#define _formula(csv,sv,len,f) cx_formula (aTHX_ csv, sv, len, f)
static char *cx_formula (pTHX_ csv_t *csv, SV *sv, STRLEN *len, int f) {
int fa = csv->formula;
if (fa == 1) die ("Formulas are forbidden\n");
if (fa == 2) croak ("Formulas are forbidden\n");
if (fa == 3) {
char *ptr = SvPV_nolen (sv);
char rec[40];
char field[128];
SV **svp;
if (csv->recno) (void)sprintf (rec, " in record %lu", csv->recno + 1);
else *rec = (char)0;
*field = (char)0;
if ((svp = hv_fetchs (csv->self, "_COLUMN_NAMES", FALSE)) && _is_arrayref (*svp)) {
AV *avp = (AV *)SvRV (*svp);
if (avp && av_len (avp) >= (f - 1)) {
SV **fnm = av_fetch (avp, f - 1, FALSE);
if (fnm && *fnm && SvOK (*fnm))
(void)sprintf (field, " (column: '%.100s')", SvPV_nolen (*fnm));
}
}
warn ("Field %d%s%s contains formula '%s'\n", f, field, rec, ptr);
return ptr;
}
if (len) *len = 0;
if (fa == 4) {
unless (SvREADONLY (sv)) SvSetEmpty (sv);
return "";
}
if (fa == 5) {
unless (SvREADONLY (sv)) SvSetUndef (sv);
return NULL;
}
if (fa == 6) {
int result;
SV **svp = hv_fetchs (csv->self, "_FORMULA_CB", FALSE);
if (svp && _is_coderef (*svp)) {
dSP;
ENTER;
SAVE_DEFSV; /* local $_ */
DEFSV = sv;
PUSHMARK (SP);
PUTBACK;
result = call_sv (*svp, G_SCALAR);
SPAGAIN;
if (result)
sv_setsv (sv, POPs);
PUTBACK;
LEAVE;
}
return len ? SvPV (sv, *len) : SvPV_nolen (sv);
}
/* So far undefined behavior */
return NULL;
} /* _formula */
#define SkipEmptyRow {\
int ser = csv->skip_empty_rows; \
\
if (ser == 3) { (void)SetDiag (csv, 2015); die ("Empty row"); } \
if (ser == 4) { (void)SetDiag (csv, 2015); croak ("Empty row"); } \
if (ser == 5) { (void)SetDiag (csv, 2015); return FALSE; } \
\
if (ser <= 2) { /* skip & eof */ \
csv->fld_idx = 0; \
c = CSV_GET; \
if (c == EOF || ser == 2) { \
sv_free (sv); \
sv = NULL; \
seenSomething = FALSE; \
if (ser == 2) return FALSE; \
break; \
} \
} \
\
if (ser == 6) { \
int result, n, i; \
SV *rv, **svp = hv_fetchs (csv->self, "_EMPTROW_CB", FALSE); \
AV *avp; \
unless (svp && _is_coderef (*svp)) \
return FALSE; /* A callback is wanted, but none found */ \
\
dSP; \
ENTER; \
SAVE_DEFSV; /* local $_ */ \
DEFSV = sv; \
PUSHMARK (SP); \
PUTBACK; \
result = call_sv (*svp, G_SCALAR); \
SPAGAIN; \
unless (result) { \
/* A false return will stop the parsing */ \
sv_free (sv); \
sv = NULL; \
waitingForField = 0; \
return FALSE; \
} \
\
PUTBACK; \
LEAVE; \
\
rv = POPs; \
/* Result should be a ref to a list. */ \
unless (_is_arrayref (rv)) \
return FALSE; \
\
avp = (AV *)SvRV (rv); \
\
unless (avp) return FALSE; \
n = av_len (avp); \
if (n <= 0) return TRUE; \
\
if (csv->is_bound && csv->is_bound < n) \
n = csv->is_bound - 1; \
\
for (i = 0; i <= n; i++) { \
SV **svp = av_fetch (avp, i, FALSE); \
sv = svp && *svp ? *svp : NULL; \
if (sv) { \
SvREFCNT_inc (sv); \
/* upgrade IV to IVPV if needed */ \
(void)SvPV_nolen (sv); \
} \
AV_PUSH; \
} \
return TRUE; \
} \
}
#define Combine(csv,dst,fields) cx_Combine (aTHX_ csv, dst, fields)
static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields) {
SSize_t i, n;
int bound = 0;
int aq = (int)csv->always_quote;
int qe = (int)csv->quote_empty;
int kmi = (int)csv->keep_meta_info;
AV *qm = NULL;
n = (IV)av_len (fields);
if (n < 0 && csv->is_bound) {
n = csv->is_bound - 1;
bound = 1;
}
if (kmi >= 10) {
SV **svp;
if ((svp = hv_fetchs (csv->self, "_FFLAGS", FALSE)) && _is_arrayref (*svp)) {
AV *avp = (AV *)SvRV (*svp);
if (avp && av_len (avp) >= n)
qm = avp;
}
}
for (i = 0; i <= n; i++) {
SV *sv;
STRLEN len = 0;
char *ptr = NULL;
if (i > 0) {
CSV_PUT (csv, dst, CH_SEP);
if (csv->sep_len) {
int x;
for (x = 1; x < (int)csv->sep_len; x++)
CSV_PUT (csv, dst, csv->sep[x]);
}
}
if (bound)
sv = bound_field (csv, i, 1);
else {
SV **svp = av_fetch (fields, i, FALSE);
sv = svp && *svp ? *svp : NULL;
}
if (sv && (SvOK (sv) || (
(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv))))) {
int quoteMe;
ptr = SvPV (sv, len);
if (*ptr == '=' && csv->formula) {
unless (ptr = _formula (csv, sv, &len, i))
continue;
}
if (len == 0)
quoteMe = aq ? 1 : qe ? 1 : qm ? was_quoted (qm, i) : 0;
else {
if (SvUTF8 (sv)) {
csv->utf8 = 1;
csv->binary = 1;
}
quoteMe = aq ? 1 : qm ? was_quoted (qm, i) : 0;
/* Do we need quoting? We do quote, if the user requested
* (always_quote), if binary or blank characters are found
* and if the string contains quote or escape characters.
*/
if (!quoteMe &&
( quoteMe = (!SvIOK (sv) && !SvNOK (sv) && CH_QUOTE))) {
char *ptr2;
STRLEN l;
#if MAINT_DEBUG > 6
(void)fprintf (stderr, "# %04d Combine:\n", __LINE__);
sv_dump (sv);
#else
#if MAINT_DEBUG > 4
(void)fprintf (stderr, "# %04d Combine: '%s'\n", __LINE__, _pretty_sv (sv));
#endif
#endif
for (ptr2 = ptr, l = len; l; ++ptr2, --l) {
byte c = *ptr2;
#ifdef IS_EBCDIC
byte x = ebcdic2ascii[c];
#if MAINT_DEBUG > 4
(void)fprintf (stderr, " %02x", x);
#endif
#else
byte x = c;
#endif
if ((CH_QUOTE && c == CH_QUOTE) ||
(CH_SEP && c == CH_SEP) ||
(csv->escape_char && c == csv->escape_char) ||
(csv->quote_binary ? (x >= 0x7f && x <= 0xa0) ||
x < csv->first_safe_char
: c == CH_NL || c == CH_CR ||
(csv->quote_space && (
c == CH_SPACE || c == CH_TAB)))) {
/* Binary character */
break;
}
}
#if defined(IS_EBCDIC) && MAINT_DEBUG > 4
(void)fprintf (stderr, "\n");
#endif
quoteMe = (l > 0);
}
}
if (quoteMe) {
CSV_PUT (csv, dst, CH_QUOTE);
if (csv->quo_len) {
int x;
for (x = 1; x < (int)csv->quo_len; x++)
CSV_PUT (csv, dst, csv->quo[x]);
}
}
while (len-- > 0) {
char c = *ptr++;
int e = 0;
if (!csv->binary && is_csv_binary (c)) {
SvREFCNT_inc (sv);
csv->has_error_input = 1;
unless (hv_store (csv->self, "_ERROR_INPUT", 12, sv, 0))
SvREFCNT_dec (sv); /* uncoverable statement memory fail */
(void)SetDiag (csv, 2110);
return FALSE;
}
if (CH_QUOTE && (byte)c == CH_QUOTE && (csv->quo_len == 0 ||
memcmp (ptr, csv->quo +1, csv->quo_len - 1) == 0))
e = 1;
else
if (c == csv->escape_char && csv->escape_char)
e = 1;
else
if (c == (char)0 && csv->escape_null) {
e = 1;
c = '0';
}
if (e && csv->escape_char)
CSV_PUT (csv, dst, csv->escape_char);
CSV_PUT (csv, dst, c);
}
if (quoteMe) {
CSV_PUT (csv, dst, CH_QUOTE);
if (csv->quo_len) {
int x;
for (x = 1; x < (int)csv->quo_len; x++)
CSV_PUT (csv, dst, csv->quo[x]);
}
}
}
else {
if (csv->undef_str) {
byte *ptr = csv->undef_str;
STRLEN len = strlen ((char *)ptr);
if (csv->undef_flg) {
csv->utf8 = 1;
csv->binary = 1;
}
while (len--)
CSV_PUT (csv, dst, *ptr++);
}
}
}
if (csv->eol_len) {
STRLEN len = csv->eol_len;
byte *ptr = csv->eol;
while (len--)
CSV_PUT (csv, dst, *ptr++);
}
if (csv->used)
return Print (csv, dst);
return TRUE;
} /* Combine */
#define ParseError(csv,xse,pos) cx_ParseError (aTHX_ csv, xse, pos)
static void cx_ParseError (pTHX_ csv_t *csv, int xse, STRLEN pos) {
(void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (pos), 0);
(void)hv_store (csv->self, "_ERROR_FLD", 10, newSViv (csv->fld_idx), 0);
if (csv->tmp) {
csv->has_error_input = 1;
if (hv_store (csv->self, "_ERROR_INPUT", 12, csv->tmp, 0))
SvREFCNT_inc (csv->tmp);
}
(void)SetDiag (csv, xse);
} /* ParseError */
#define CsvGet(csv,src) cx_CsvGet (aTHX_ csv, src)
static int cx_CsvGet (pTHX_ csv_t *csv, SV *src) {
unless (csv->useIO)
return EOF;
if (csv->tmp && csv->eol_pos >= 0) {
csv->eol_pos = -2;
sv_setpvn (csv->tmp, (char *)csv->eol, csv->eol_len);
csv->bptr = SvPV (csv->tmp, csv->size);
csv->used = 0;
return CH_EOLX;
}
{ STRLEN result;
dSP;
PUSHMARK (sp);
EXTEND (sp, 1);
PUSHs (src);
PUTBACK;
result = call_sv (m_getline, G_METHOD);
SPAGAIN;
csv->eol_pos = -1;
csv->tmp = result ? POPs : NULL;
PUTBACK;
#if MAINT_DEBUG > 6
(void)fprintf (stderr, "# %04d getline () returned:\n", __LINE__);
sv_dump (csv->tmp);
#else
#if MAINT_DEBUG > 4
(void)fprintf (stderr, "# %04d getline () returned: '%s'\n", __LINE__, _pretty_sv (csv->tmp));
#endif
#endif
}
if (csv->tmp && SvOK (csv->tmp)) {
STRLEN tmp_len;
csv->bptr = SvPV (csv->tmp, tmp_len);
csv->used = 0;
csv->size = tmp_len;
if (csv->eolx && csv->size >= csv->eol_len) {
int i, match = 1;
for (i = 1; i <= (int)csv->eol_len; i++) {
unless (csv->bptr[csv->size - i] == csv->eol[csv->eol_len - i]) {
match = 0;
break;
}
}
if (match) {
#if MAINT_DEBUG > 4
(void)fprintf (stderr, "# %04d EOLX match, size: %d\n", __LINE__, csv->size);
#endif
csv->size -= csv->eol_len;
unless (csv->verbatim)
csv->eol_pos = csv->size;
csv->bptr[csv->size] = (char)0;
SvCUR_set (csv->tmp, csv->size);
unless (csv->verbatim || csv->size)
return CH_EOLX;
}
}
if (SvUTF8 (csv->tmp)) csv->utf8 = 1;
if (tmp_len)
return ((byte)csv->bptr[csv->used++]);
}
csv->useIO |= useIO_EOF;
return EOF;
} /* CsvGet */
#define ERROR_INSIDE_QUOTES(diag_code) { \
unless (csv->is_bound) SvREFCNT_dec (sv); \
ParseError (csv, diag_code, csv->used - 1); \
return FALSE; \
}
#define ERROR_INSIDE_FIELD(diag_code) { \
unless (csv->is_bound) SvREFCNT_dec (sv); \
ParseError (csv, diag_code, csv->used - 1); \
return FALSE; \
}
#if MAINT_DEBUG > 4
#define PUT_RPT (void)fprintf (stderr, "# %04d CSV_PUT: 0x%02x '%c'\n", __LINE__, c, isprint (c) ? c : '?')
#define PUT_SEPX_RPT1 (void)fprintf (stderr, "# %04d PUT SEPX\n", __LINE__)
#define PUT_SEPX_RPT2 (void)fprintf (stderr, "# %04d Done putting SEPX\n")
#define PUT_QUOX_RPT1 (void)fprintf (stderr, "# %04d PUT QUOX\n", __LINE__)
#define PUT_QUOX_RPT2 (void)fprintf (stderr, "# %04d Done putting QUOX\n")
#define PUT_EOLX_RPT1 (void)fprintf (stderr, "# %04d PUT EOLX\n", __LINE__)
#define PUT_EOLX_RPT2 (void)fprintf (stderr, "# %04d Done putting EOLX\n")
#if MAINT_DEBUG > 6
#define PUSH_RPT (void)fprintf (stderr, "# %04d AV_PUSHd\n", __LINE__); sv_dump (sv)
#else
#define PUSH_RPT (void)fprintf (stderr, "# %04d AV_PUSHd '%s'\n", __LINE__, _pretty_sv (sv))
#endif
#else
#define PUT_RPT
#define PUT_SEPX_RPT1
#define PUT_SEPX_RPT2
#define PUT_QUOX_RPT1
#define PUT_QUOX_RPT2
#define PUT_EOLX_RPT1
#define PUT_EOLX_RPT2
#define PUSH_RPT
#endif
#define CSV_PUT_SV1(c) { \
len = SvCUR ((sv)); \
SvGROW ((sv), len + 2); \
*SvEND ((sv)) = c; \
PUT_RPT; \
SvCUR_set ((sv), len + 1); \
}
#define CSV_PUT_SV(c) { \
if (c == CH_EOLX) { \
int x; PUT_EOLX_RPT1; \
if (csv->eol_pos == -2) \
csv->size = 0; \
for (x = 0; x < (int)csv->eol_len; x++) \
CSV_PUT_SV1 (csv->eol[x]); \
csv->eol_pos = -1; \
PUT_EOLX_RPT2; \
} \
else if (c == CH_SEPX) { \
int x; PUT_SEPX_RPT1; \
for (x = 0; x < (int)csv->sep_len; x++) \
CSV_PUT_SV1 (csv->sep[x]); \
PUT_SEPX_RPT2; \
} \
else if (c == CH_QUOTEX) { \
int x; PUT_QUOX_RPT1; \
for (x = 0; x < (int)csv->quo_len; x++) \
CSV_PUT_SV1 (csv->quo[x]); \
PUT_QUOX_RPT2; \
} \
else \
CSV_PUT_SV1 (c); \
}
#define CSV_GET1 \
(csv->used < csv->size ? (byte)csv->bptr[csv->used++] : CsvGet (csv, src))
#if MAINT_DEBUG > 3
int CSV_GET_ (pTHX_ csv_t *csv, SV *src, int l) {
int c;
(void)fprintf (stderr, "# %04d 1-CSV_GET: (used: %d, size: %d, eol_pos: %d, eolx = %d)\n", l, csv->used, csv->size, csv->eol_pos, csv->eolx);
c = CSV_GET1;
(void)fprintf (stderr, "# %04d 2-CSV_GET: 0x%02x '%c'\n", l, c, isprint (c) ? c : '?');
return (c);
} /* CSV_GET_ */
#define CSV_GET CSV_GET_ (aTHX_ csv, src, __LINE__)
#else
#define CSV_GET CSV_GET1
#endif
#define AV_PUSH { \
int svc; \
*SvEND (sv) = (char)0; \
svc = SvCUR (sv); \
SvUTF8_off (sv); \
if (svc && csv->formula && *(SvPV_nolen (sv)) == '=') \
(void)_formula (csv, sv, NULL, fnum); \
if (svc == 0 && ( \
csv->empty_is_undef || \
(!(f & CSV_FLAGS_QUO) && csv->blank_is_undef))) \
SvSetUndef (sv); \
else { \
if (csv->allow_whitespace && ! (f & CSV_FLAGS_QUO)) \
strip_trail_whitespace (sv); \
if (f & CSV_FLAGS_BIN && csv->decode_utf8 \
&& (csv->utf8 || is_utf8_sv (sv))) \
SvUTF8_on (sv); \
} \
SvSETMAGIC (sv); \
unless (csv->is_bound) av_push (fields, sv); \
PUSH_RPT; \
sv = NULL; \
if (csv->keep_meta_info && fflags) \
av_push (fflags, newSViv (f)); \
waitingForField = 1; \
}
#define strip_trail_whitespace(sv) cx_strip_trail_whitespace (aTHX_ sv)
static void cx_strip_trail_whitespace (pTHX_ SV *sv) {
STRLEN len;
char *s = SvPV (sv, len);
unless (s && len) return;
while (s[len - 1] == CH_SPACE || s[len - 1] == CH_TAB)
s[--len] = (char)0;
SvCUR_set (sv, len);
} /* strip_trail_whitespace */
#define NewField \
unless (sv) { \
if (csv->is_bound) \
sv = bound_field (csv, fnum, 0); \
else \
sv = newSVpvs (""); \
fnum++; \
unless (sv) return FALSE; \
f = 0; csv->fld_idx++; \
}
#if MAINT_DEBUG
static char str_parsed[40];
#endif
#if MAINT_DEBUG > 1
static char _sep[64];
static char *_sep_string (csv_t *csv) {
if (csv->sep_len) {
int x;
for (x = 0; x < csv->sep_len; x++)
(void)sprintf (_sep + x * x, "%02x ", csv->sep[x]);
}
else
(void)sprintf (_sep, "'%c' (0x%02x)", CH_SEP, CH_SEP);
return _sep;
} /* _sep_string */
#endif
#define Parse(csv,src,fields,fflags) cx_Parse (aTHX_ csv, src, fields, fflags)
static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags) {
int c, f = 0;
int waitingForField = 1;
SV *sv = NULL;
STRLEN len;
int seenSomething = FALSE;
int fnum = 0;
int spl = -1;
#if MAINT_DEBUG
(void)memset (str_parsed, 0, 40);
#endif
csv->fld_idx = 0;
while ((c = CSV_GET) != EOF) {
NewField;
seenSomething = TRUE;
spl++;
#if MAINT_DEBUG
if (spl < 39) str_parsed[spl] = c;
#endif
restart:
#if MAINT_DEBUG > 9
(void)fprintf (stderr, "# %04d at restart: %d/%d/%03x pos %d = 0x%02x\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
#endif
if (is_SEP (c)) {
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = SEP %s\t%s\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
_sep_string (csv), _pretty_strl (csv->bptr + csv->used));
#endif
if (waitingForField) {
/* ,1,"foo, 3",,bar,
* ^ ^
*/
if (csv->blank_is_undef || csv->empty_is_undef)
SvSetUndef (sv);
else
SvSetEmpty (sv);
unless (csv->is_bound)
av_push (fields, sv);
sv = NULL;
if (csv->keep_meta_info && fflags)
av_push (fflags, newSViv (f));
}
else
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo, 3",,bar,
* ^
*/
CSV_PUT_SV (c)
}
else {
/* ,1,"foo, 3",,bar,
* ^ ^ ^
*/
AV_PUSH;
}
} /* SEP char */
else
if (is_QUOTE (c)) {
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = QUO '%c'\t\t%s\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
_pretty_strl (csv->bptr + csv->used));
#endif
if (waitingForField) {
/* ,1,"foo, 3",,bar,\r\n
* ^
*/
f |= CSV_FLAGS_QUO;
waitingForField = 0;
continue;
}
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo, 3",,bar,\r\n
* ^
*/
int quoesc = 0;
int c2 = CSV_GET;
if (csv->allow_whitespace) {
/* , 1 , "foo, 3" , , bar , \r\n
* ^
*/
while (is_whitespace (c2)) {
if (csv->allow_loose_quotes &&
!(csv->escape_char && c2 == csv->escape_char)) {
/* This feels like a brittle fix for RT115953, where
* ["foo "bar" baz"] got parsed as [foo "bar"baz]
* when both allow_whitespace and allow_loose_quotes
* are true and escape does not equal quote
*/
CSV_PUT_SV (c);
c = c2;
}
c2 = CSV_GET;
}
}
if (is_SEP (c2)) {
/* ,1,"foo, 3",,bar,\r\n
* ^
*/
AV_PUSH;
continue;
}
if (c2 == CH_NL || c2 == CH_EOLX) {
/* ,1,"foo, 3",,"bar"\n
* ^
*/
AV_PUSH;
return TRUE;
}
/* ---
* if QUOTE eq ESCAPE
* AND ( c2 eq QUOTE 1,"abc""def",2
* OR c2 eq ESCAPE 1,"abc""def",2 (QUO eq ESC)
* OR c2 eq NULL ) 1,"abc"0def",2
* ---
*/
if (csv->escape_char && c == csv->escape_char) {
quoesc = 1;
if (c2 == '0') {
/* ,1,"foo, 3"056",,bar,\r\n
* ^
*/
CSV_PUT_SV (0)
continue;
}
if (is_QUOTE (c2)) {
/* ,1,"foo, 3""56",,bar,\r\n
* ^
*/
if (csv->utf8)
f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
continue;
}
if (csv->allow_loose_escapes && c2 != CH_CR) {
/* ,1,"foo, 3"56",,bar,\r\n
* ^
*/
CSV_PUT_SV (c);
c = c2;
goto restart;
}
}
if (c2 == CH_CR) {
int c3;
if (csv->eol_is_cr) {
/* ,1,"foo, 3"\r
* ^
*/
AV_PUSH;
return TRUE;
}
c3 = CSV_GET;
if (c3 == CH_NL) { /* \r is not optional before EOLX! */
/* ,1,"foo, 3"\r\n
* ^
*/
AV_PUSH;
return TRUE;
}
if (csv->useIO && csv->eol_len == 0) {
if (c3 == CH_CR) { /* \r followed by an empty line */
/* ,1,"foo, 3"\r\r
* ^
*/
set_eol_is_cr (csv);
goto EOLX;
}
if (!is_csv_binary (c3)) {
/* ,1,"foo\n 3",,"bar"\r
* baz,4
* ^
*/
set_eol_is_cr (csv);
csv->used--;
csv->has_ahead++;
AV_PUSH;
return TRUE;
}
}
ParseError (csv, quoesc ? 2023 : 2010, csv->used - 2);
return FALSE;
}
if (c2 == EOF) {
/* ,1,"foo, 3"
* ^
*/
AV_PUSH;
return TRUE;
}
if (csv->allow_loose_quotes && !quoesc) {
/* ,1,"foo, 3"456",,bar,\r\n
* ^
*/
CSV_PUT_SV (c);
c = c2;
goto restart;
}
/* 1,"foo" ",3
* ^
*/
if (quoesc) {
csv->used--;
ERROR_INSIDE_QUOTES (2023);
}
ERROR_INSIDE_QUOTES (2011);
}
/* !waitingForField, !InsideQuotes */
if (csv->allow_loose_quotes) { /* 1,foo "boo" d'uh,1 */
f |= CSV_FLAGS_EIF; /* Mark as error-in-field */
CSV_PUT_SV (c);
}
else
ERROR_INSIDE_FIELD (2034);
} /* QUO char */
else
if (c == csv->escape_char && csv->escape_char) {
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = ESC '%c'\t%s\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
_pretty_strl (csv->bptr + csv->used));
#endif
/* This means quote_char != escape_char */
if (waitingForField) {
waitingForField = 0;
if (csv->allow_unquoted_escape) {
/* The escape character is the first character of an
* unquoted field */
/* ... get and store next character */
int c2 = CSV_GET;
SvSetEmpty (sv);
if (c2 == EOF) {
csv->used--;
ERROR_INSIDE_FIELD (2035);
}
if (c2 == '0')
CSV_PUT_SV (0)
else
if ( is_QUOTE (c2) || is_SEP (c2) ||
c2 == csv->escape_char || csv->allow_loose_escapes) {
if (csv->utf8)
f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
}
else {
csv->used--;
ERROR_INSIDE_QUOTES (2025);
}
}
}
else
if (f & CSV_FLAGS_QUO) {
int c2 = CSV_GET;
if (c2 == EOF) {
csv->used--;
ERROR_INSIDE_QUOTES (2024);
}
if (c2 == '0')
CSV_PUT_SV (0)
else
if ( is_QUOTE (c2) || is_SEP (c2) ||
c2 == csv->escape_char || csv->allow_loose_escapes) {
if (csv->utf8)
f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
}
else {
csv->used--;
ERROR_INSIDE_QUOTES (2025);
}
}
else
if (sv) {
int c2 = CSV_GET;
if (c2 == EOF) {
csv->used--;
ERROR_INSIDE_FIELD (2035);
}
CSV_PUT_SV (c2);
}
else
ERROR_INSIDE_FIELD (2036); /* uncoverable statement I think there's no way to get here */
} /* ESC char */
else
if (c == CH_NL || is_EOL (c)) {
EOLX:
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = NL\t%s\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
_pretty_strl (csv->bptr + csv->used));
#endif
if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
SkipEmptyRow;
goto restart;
}
if (waitingForField) {
/* ,1,"foo, 3",,bar,
* ^
*/
if (csv->blank_is_undef || csv->empty_is_undef)
SvSetUndef (sv);
else
SvSetEmpty (sv);
unless (csv->is_bound)
av_push (fields, sv);
if (csv->keep_meta_info && fflags)
av_push (fflags, newSViv (f));
return TRUE;
}
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo\n 3",,bar,
* ^
*/
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2021);
CSV_PUT_SV (c);
}
else
if (csv->verbatim) {
/* ,1,foo\n 3,,bar,
* This feature should be deprecated
*/
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_FIELD (2030);
CSV_PUT_SV (c);
}
else {
/* sep=,
* ^
*/
if (csv->recno == 0 && csv->fld_idx == 1 && csv->useIO &&
(csv->bptr[0] == 's' || csv->bptr[0] == 'S') &&
(csv->bptr[1] == 'e' || csv->bptr[1] == 'E') &&
(csv->bptr[2] == 'p' || csv->bptr[2] == 'P') &&
csv->bptr[3] == '=') {
char *sep = csv->bptr + 4;
int lnu = csv->used - 5;
if (lnu <= MAX_ATTR_LEN) {
sep[lnu] = (char)0;
(void)memcpy (csv->sep, sep, lnu);
csv->sep_len = lnu == 1 ? 0 : lnu;
return Parse (csv, src, fields, fflags);
}
}
/* ,1,"foo\n 3",,bar
* ^
*/
AV_PUSH;
return TRUE;
}
} /* CH_NL */
else
if (c == CH_CR && !(csv->verbatim)) {
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = CR\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl);
#endif
if (waitingForField) {
int c2;
if (csv->eol_is_cr) {
/* ,1,"foo\n 3",,bar,\r
* ^
*/
c = CH_NL;
goto EOLX;
}
c2 = CSV_GET;
if (c2 == EOF) {
/* ,1,"foo\n 3",,bar,\r
* ^
*/
c = EOF;
#if MAINT_DEBUG > 9
(void)fprintf (stderr, "# %04d (%d) ... CR EOF 0x%x\n",
__LINE__, seenSomething, c);
#endif
unless (seenSomething)
break;
goto restart;
}
if (c2 == CH_NL) { /* \r is not optional before EOLX! */
/* ,1,"foo\n 3",,bar,\r\n
* ^
*/
c = c2;
goto EOLX;
}
if (csv->useIO && csv->eol_len == 0) {
if (c2 == CH_CR) { /* \r followed by an empty line */
/* ,1,"foo\n 3",,bar,\r\r
* ^
*/
set_eol_is_cr (csv);
goto EOLX;
}
waitingForField = 0;
if (!is_csv_binary (c2)) {
/* ,1,"foo\n 3",,bar,\r
* baz,4
* ^
*/
set_eol_is_cr (csv);
csv->used--;
csv->has_ahead++;
if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
SkipEmptyRow;
goto restart;
}
AV_PUSH;
return TRUE;
}
}
/* ,1,"foo\n 3",,bar,\r\t
* ^
*/
csv->used--;
ERROR_INSIDE_FIELD (2031);
}
if (f & CSV_FLAGS_QUO) {
/* ,1,"foo\r 3",,bar,\r\t
* ^
*/
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2022);
CSV_PUT_SV (c);
}
else {
int c2;
if (csv->eol_is_cr) {
/* ,1,"foo\n 3",,bar\r
* ^
*/
goto EOLX;
}
c2 = CSV_GET;
if (c2 == CH_NL) { /* \r is not optional before EOLX! */
/* ,1,"foo\n 3",,bar\r\n
* ^
*/
goto EOLX;
}
if (csv->useIO && csv->eol_len == 0) {
if (!is_csv_binary (c2)
/* ,1,"foo\n 3",,bar\r
* baz,4
* ^
*/
|| c2 == CH_CR) {
/* ,1,"foo\n 3",,bar,\r\r
* ^
*/
set_eol_is_cr (csv);
csv->used--;
csv->has_ahead++;
if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
SkipEmptyRow;
goto restart;
}
AV_PUSH;
return TRUE;
}
}
/* ,1,"foo\n 3",,bar\r\t
* ^
*/
ERROR_INSIDE_FIELD (2032);
}
} /* CH_CR */
else {
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = CCC '%c'\t\t%s\n",
__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
_pretty_strl (csv->bptr + csv->used));
#endif
/* Needed for non-IO parse, where EOL is not set during read */
if (csv->eolx && c == CH_EOL &&
csv->size - csv->used >= (STRLEN)csv->eol_len - 1 &&
!memcmp (csv->bptr + csv->used, csv->eol + 1, csv->eol_len - 1) &&
(csv->used += csv->eol_len - 1)) {
c = CH_EOLX;
#if MAINT_DEBUG > 5
(void)fprintf (stderr, "# %04d -> EOLX (0x%x)\n", __LINE__, c);
#endif
goto EOLX;
}
if (waitingForField) {
if (csv->comment_str && !f && !spl && c == *csv->comment_str) {
STRLEN cl = strlen ((char *)csv->comment_str);
#if MAINT_DEBUG > 5
(void)fprintf (stderr,
"# %04d COMMENT? cl = %d, size = %d, used = %d\n",
__LINE__, cl, csv->size, csv->used);
#endif
if (cl == 1 || (
(csv->size - csv->used >= cl - 1 &&
!memcmp (csv->bptr + csv->used, csv->comment_str + 1, cl - 1) &&
(csv->used += cl - 1)))) {
csv->used = csv->size;
csv->fld_idx = csv->strict_n ? csv->strict_n - 1 : 0;
c = CSV_GET;
seenSomething = FALSE;
#if MAINT_DEBUG > 5
(void)fprintf (stderr, "# %04d COMMENT, SKIPPED\n", __LINE__);
#endif
if (c == EOF)
break;
goto restart;
}
}
if (csv->allow_whitespace && is_whitespace (c)) {
do {
c = CSV_GET;
#if MAINT_DEBUG > 5
(void)fprintf (stderr, "# %04d WS next got (0x%x)\n", __LINE__, c);
#endif
} while (is_whitespace (c));
if (c == EOF)
break;
goto restart;
}
waitingForField = 0;
goto restart;
}
#if MAINT_DEBUG > 5
(void)fprintf (stderr, "# %04d %sc 0x%x is%s binary %s utf8\n",
__LINE__, f & CSV_FLAGS_QUO ? "quoted " : "", c,
is_csv_binary (c) ? "" : " not",
csv->utf8 ? "is" : "not");
#endif
if (f & CSV_FLAGS_QUO) {
if (is_csv_binary (c)) {
f |= CSV_FLAGS_BIN;
unless (csv->binary || csv->utf8)
ERROR_INSIDE_QUOTES (2026);
}
CSV_PUT_SV (c);
}
else {
if (is_csv_binary (c)) {
if (csv->useIO && c == EOF)
break;
f |= CSV_FLAGS_BIN;
unless (csv->binary || csv->utf8)
ERROR_INSIDE_FIELD (2037);
}
CSV_PUT_SV (c);
}
}
/* continue */
if (csv->verbatim && csv->useIO && csv->used == csv->size)
break;
}
if (waitingForField) {
if (seenSomething || !csv->useIO) {
NewField;
if (csv->blank_is_undef || csv->empty_is_undef)
SvSetUndef (sv);
else
SvSetEmpty (sv);
unless (csv->is_bound)
av_push (fields, sv);
if (csv->keep_meta_info && fflags)
av_push (fflags, newSViv (f));
return TRUE;
}
(void)SetDiag (csv, 2012);
return FALSE;
}
if (f & CSV_FLAGS_QUO)
ERROR_INSIDE_QUOTES (2027);
if (sv) {
AV_PUSH;
}
else if (f == 0 && fnum == 1 && csv->skip_empty_rows == 1)
return FALSE;
return TRUE;
} /* Parse */
static int hook (pTHX_ HV *hv, char *cb_name, AV *av) {
SV **svp;
HV *cb;
int res;
#if MAINT_DEBUG > 1
(void)fprintf (stderr, "# %04d HOOK %s %x\n", __LINE__, cb_name, av);
#endif
unless ((svp = hv_fetchs (hv, "callbacks", FALSE)) && _is_hashref (*svp))
return 0; /* uncoverable statement defensive programming */
cb = (HV *)SvRV (*svp);
svp = hv_fetch (cb, cb_name, strlen (cb_name), FALSE);
unless (svp && _is_coderef (*svp))
return 0;
{ dSP;
ENTER;
SAVETMPS;
PUSHMARK (SP);
mXPUSHs (newRV_inc ((SV *)hv));
mXPUSHs (newRV_inc ((SV *)av));
PUTBACK;
res = call_sv (*svp, G_SCALAR);
SPAGAIN;
if (res) {
SV *rv = POPs;
if (SvROK (rv) && (rv = SvRV (rv)) && SvPOK (rv)) {
if (strcmp (SvPV_nolen (rv), "skip") == 0)
res = 0;
}
}
PUTBACK;
FREETMPS;
LEAVE;
}
return res;
} /* hook */
#define c_xsParse(csv,hv,av,avf,src,useIO) cx_c_xsParse (aTHX_ csv, hv, av, avf, src, useIO)
static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
int result, ahead = 0;
SV *pos = NULL;
ENTER;
if (csv.eolx || csv.eol_is_cr) {
/* local $/ = $eol */
SAVEGENERICSV (PL_rs);
PL_rs = newSVpvn ((char *)csv.eol, csv.eol_len);
}
if ((csv.useIO = useIO)) {
csv.tmp = NULL;
if ((ahead = csv.has_ahead)) {
SV **svp;
if ((svp = hv_fetchs (hv, "_AHEAD", FALSE)) && *svp) {
csv.bptr = SvPV (csv.tmp = *svp, csv.size);
csv.used = 0;
if (pos && SvIV (pos) > (IV)csv.size)
sv_setiv (pos, SvIV (pos) - csv.size);
}
}
}
else {
csv.tmp = src;
csv.utf8 = SvUTF8 (src) ? 1 : 0;
csv.bptr = SvPV (src, csv.size);
}
if (csv.has_error_input) {
(void)hv_store (hv, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
csv.has_error_input = 0;
}
result = Parse (&csv, src, av, avf);
(void)hv_store (hv, "_RECNO", 6, newSViv (++csv.recno), 0);
(void)hv_store (hv, "_EOF", 4, &PL_sv_no, 0);
if (csv.strict) {
STRLEN nf = av_len (av);
#if MAINT_DEBUG > 6
(void)fprintf (stderr, "# %04d Strict nf = %2d, n = %2d, idx = %2d, recno = %2d, res = %d\n",
__LINE__, nf, csv.strict_n, csv.fld_idx, csv.recno, result);
#endif
if (nf && !csv.strict_n) csv.strict_n = (short)nf;
if (csv.strict_n > 0 && nf != csv.strict_n) {
unless (csv.useIO & useIO_EOF)
ParseError (&csv, 2014, csv.used);
if (last_error) /* an error callback can reset and accept */
result = FALSE;
}
}
if (csv.useIO) {
if (csv.tmp && csv.used < csv.size && csv.has_ahead) {
SV *sv = newSVpvn (csv.bptr + csv.used, csv.size - csv.used);
(void)hv_store (hv, "_AHEAD", 6, sv, 0);
}
else {
csv.has_ahead = 0;
if (csv.useIO & useIO_EOF)
(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
}
/* csv.cache[CACHE_ID__has_ahead] = csv.has_ahead; */
(void)memcpy (csv.cache, &csv, sizeof (csv_t));
if (avf) {
if (csv.keep_meta_info)
(void)hv_store (hv, "_FFLAGS", 7, newRV_noinc ((SV *)avf), 0);
else {
av_undef (avf);
sv_free ((SV *)avf);
}
}
}
else { /* just copy to the cache */
SV **svp = hv_fetchs (hv, "_CACHE", FALSE);
if (svp && *svp)
csv.cache = (byte *)SvPV_nolen (*svp);
(void)memcpy (csv.cache, &csv, sizeof (csv_t));
}
if (result && csv.types) {
STRLEN i;
STRLEN len = av_len (av);
SV **svp;
for (i = 0; i <= len && i <= csv.types_len; i++) {
if ((svp = av_fetch (av, i, FALSE)) && *svp && SvOK (*svp)) {
switch (csv.types[i]) {
case CSV_XS_TYPE_IV:
#ifdef CSV_XS_TYPE_WARN
sv_setiv (*svp, SvIV (*svp));
#else
if (SvTRUE (*svp))
sv_setiv (*svp, SvIV (*svp));
else
sv_setiv (*svp, 0);
#endif
break;
case CSV_XS_TYPE_NV:
#ifdef CSV_XS_TYPE_WARN
sv_setnv (*svp, SvNV (*svp));
#else
if (SvTRUE (*svp))
sv_setnv (*svp, SvNV (*svp));
else
sv_setnv (*svp, 0.0);
#endif
break;
default:
break;
}
}
}
}
LEAVE;
return result;
} /* c_xsParse */
#define xsParse(self,hv,av,avf,src,useIO) cx_xsParse (aTHX_ self, hv, av, avf, src, useIO)
static int cx_xsParse (pTHX_ SV *self, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
csv_t csv;
int state;
SetupCsv (&csv, hv, self);
state = c_xsParse (csv, hv, av, avf, src, useIO);
if (state && csv.has_hooks & HOOK_AFTER_PARSE)
(void)hook (aTHX_ hv, "after_parse", av);
return (state || !last_error);
} /* xsParse */
/* API also offers av_clear and av_undef, but they have more overhead */
#define av_empty(av) cx_av_empty (aTHX_ av)
static void cx_av_empty (pTHX_ AV *av) {
while (av_len (av) >= 0)
sv_free (av_pop (av));
} /* av_empty */
#define xsParse_all(self,hv,io,off,len) cx_xsParse_all (aTHX_ self, hv, io, off, len)
static SV *cx_xsParse_all (pTHX_ SV *self, HV *hv, SV *io, SV *off, SV *len) {
csv_t csv;
int n = 0, skip = 0, length = MAXINT, tail = MAXINT;
AV *avr = newAV ();
AV *row = newAV ();
SetupCsv (&csv, hv, self);
if (SvOK (off)) {
skip = SvIV (off);
if (skip < 0) {
tail = -skip;
skip = -1;
}
}
if (SvOK (len))
length = SvIV (len);
while (c_xsParse (csv, hv, row, NULL, io, 1)) {
SetupCsv (&csv, hv, self);
if (skip > 0) {
skip--;
av_empty (row); /* re-use */
continue;
}
if (n++ >= tail) {
SvREFCNT_dec (av_shift (avr));
n--;
}
if (csv.has_hooks & HOOK_AFTER_PARSE) {
unless (hook (aTHX_ hv, "after_parse", row)) {
av_empty (row); /* re-use */
continue;
}
}
av_push (avr, newRV_noinc ((SV *)row));
if (n >= length && skip >= 0)
break; /* We have enough */
row = newAV ();
}
while (n > length) {
SvREFCNT_dec (av_pop (avr));
n--;
}
return (SV *)sv_2mortal (newRV_noinc ((SV *)avr));
} /* xsParse_all */
#define xsCombine(self,hv,av,io,useIO) cx_xsCombine (aTHX_ self, hv, av, io, useIO)
static int cx_xsCombine (pTHX_ SV *self, HV *hv, AV *av, SV *io, bool useIO) {
csv_t csv;
int result;
#if (PERL_BCDVERSION >= 0x5008000)
SV *ors = PL_ors_sv;
#endif
SetupCsv (&csv, hv, self);
csv.useIO = useIO;
#if (PERL_BCDVERSION >= 0x5008000)
if (*csv.eol)
PL_ors_sv = NULL;
#endif
if (useIO && csv.has_hooks & HOOK_BEFORE_PRINT)
(void)hook (aTHX_ hv, "before_print", av);
result = Combine (&csv, io, av);
#if (PERL_BCDVERSION >= 0x5008000)
PL_ors_sv = ors;
#endif
if (result && !useIO && csv.utf8)
sv_utf8_upgrade (io);
return result;
} /* xsCombine */
MODULE = Text::CSV_XS PACKAGE = Text::CSV_XS
PROTOTYPES: DISABLE
BOOT:
m_getline = newSVpvs ("getline");
m_print = newSVpvs ("print");
Perl_load_module (aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs ("IO::Handle"), NULL, NULL, NULL);
void
SetDiag (SV *self, int xse, ...)
PPCODE:
HV *hv;
csv_t csv;
if (SvOK (self) && SvROK (self)) {
CSV_XS_SELF;
SetupCsv (&csv, hv, self);
ST (0) = SetDiag (&csv, xse);
}
else {
last_error = xse;
ST (0) = sv_2mortal (SvDiag (xse));
}
if (xse && items > 2 && SvPOK (ST (2))) {
sv_setpvn (ST (0), SvPVX (ST (2)), SvCUR (ST (2)));
SvIOK_on (ST (0));
}
XSRETURN (1);
/* XS SetDiag */
void
error_input (SV *self)
PPCODE:
if (self && SvOK (self) && SvROK (self) && SvTYPE (SvRV (self)) == SVt_PVHV) {
HV *hv = (HV *)SvRV (self);
SV **sv = hv_fetchs (hv, "_ERROR_INPUT", FALSE);
if (SvOK (*sv))
ST (0) = *sv;
else
ST (0) = newSV (0);
}
else
ST (0) = newSV (0);
XSRETURN (1);
/* XS error_input */
void
Combine (SV *self, SV *dst, SV *fields, bool useIO)
PPCODE:
HV *hv;
AV *av;
CSV_XS_SELF;
av = (AV *)SvRV (fields);
ST (0) = xsCombine (self, hv, av, dst, useIO) ? &PL_sv_yes : &PL_sv_undef;
XSRETURN (1);
/* XS Combine */
void
Parse (SV *self, SV *src, SV *fields, SV *fflags)
PPCODE:
HV *hv;
AV *av;
AV *avf;
CSV_XS_SELF;
av = (AV *)SvRV (fields);
avf = (AV *)SvRV (fflags);
ST (0) = xsParse (self, hv, av, avf, src, 0) ? &PL_sv_yes : &PL_sv_no;
XSRETURN (1);
/* XS Parse */
void
print (SV *self, SV *io, SV *fields)
PPCODE:
HV *hv;
AV *av;
CSV_XS_SELF;
if (fields == &PL_sv_undef)
av = newAV ();
else {
unless (_is_arrayref (fields))
croak ("Expected fields to be an array ref");
av = (AV *)SvRV (fields);
}
ST (0) = xsCombine (self, hv, av, io, 1) ? &PL_sv_yes : &PL_sv_no;
XSRETURN (1);
/* XS print */
void
getline (SV *self, SV *io)
PPCODE:
HV *hv;
AV *av;
AV *avf;
CSV_XS_SELF;
av = newAV ();
avf = newAV ();
ST (0) = xsParse (self, hv, av, avf, io, 1)
? sv_2mortal (newRV_noinc ((SV *)av))
: &PL_sv_undef;
XSRETURN (1);
/* XS getline */
void
getline_all (SV *self, SV *io, ...)
PPCODE:
HV *hv;
SV *offset, *length;
CSV_XS_SELF;
offset = items > 2 ? ST (2) : &PL_sv_undef;
length = items > 3 ? ST (3) : &PL_sv_undef;
ST (0) = xsParse_all (self, hv, io, offset, length);
XSRETURN (1);
/* XS getline_all */
void
_cache_set (SV *self, int idx, SV *val)
PPCODE:
HV *hv;
CSV_XS_SELF;
xs_cache_set (hv, idx, val);
XSRETURN (1);
/* XS _cache_set */
void
_cache_diag (SV *self)
PPCODE:
HV *hv;
CSV_XS_SELF;
xs_cache_diag (hv);
XSRETURN (1);
/* XS _cache_diag */