Group
Extension

Text-CSV_XS/CSV_XS.xs

/*  Copyright (c) 2007-2024 H.Merijn Brand.  All rights reserved.
 *  Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
 *  This program is free software; you can redistribute it and/or
 *  modify it under the same terms as Perl itself.
 */
#define PERL_NO_GET_CONTEXT
#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>
#define DPPP_PL_parser_NO_DUMMY
#define NEED_utf8_to_uvchr_buf
#define NEED_my_snprintf
#define NEED_pv_escape
#define NEED_pv_pretty
#ifndef PERLIO_F_UTF8
#  define PERLIO_F_UTF8	0x00008000
#  endif
#ifndef MAXINT
#  define MAXINT ((int)(~(unsigned)0 >> 1))
#  endif
#include "ppport.h"
#define is_utf8_sv(s) is_utf8_string ((U8 *)SvPV_nolen (s), SvCUR (s))

#define MAINT_DEBUG	0

#define BUFFER_SIZE	1024

#define CSV_XS_TYPE_WARN	1
#define CSV_XS_TYPE_PV		0
#define CSV_XS_TYPE_IV		1
#define CSV_XS_TYPE_NV		2

/* maximum length for EOL, SEP, and QUOTE - keep in sync with .pm */
#define MAX_ATTR_LEN	16

#define CSV_FLAGS_QUO		0x0001
#define CSV_FLAGS_BIN		0x0002
#define CSV_FLAGS_EIF		0x0004
#define CSV_FLAGS_MIS		0x0010

#define HOOK_ERROR		0x0001
#define HOOK_AFTER_PARSE	0x0002
#define HOOK_BEFORE_PRINT	0x0004

#ifdef __THW_370__
/* EBCDIC on os390 z/OS: IS_EBCDIC reads better than __THW_370__ */
#define IS_EBCDIC
#endif

#define CH_TAB		'\t'
#define CH_NL		'\n'
#define CH_CR		'\r'
#define CH_SPACE	' '
#define CH_QUO		'"'

#ifdef IS_EBCDIC
#define CH_DEL		'\007'
static unsigned char ec, ebcdic2ascii[256] = {
    0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f,
    0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87,
    0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f,
    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
    0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
    0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
    0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
    0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
    0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
    0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
    0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
    0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
    0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
    0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
    0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
    0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
    0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
    0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae,
    0xac, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
    0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7,
    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
    /*          v this 0xa0 really should be 0xad. Needed for UTF = binary */
    0x48, 0x49, 0xa0, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
    0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff,
    0x5c, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
    0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
    0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
    };
#define is_csv_binary(ch) ((((ec = ebcdic2ascii[ch]) < 0x20 || ec >= 0x7f) && ch != CH_TAB) || ch == EOF)
#else
#define CH_DEL		'\177'
#define is_csv_binary(ch) ((ch < CH_SPACE || ch >= CH_DEL) && ch != CH_TAB)
#endif
#define CH_EOLX		1215
#define CH_EOL		*csv->eol
#define CH_SEPX		8888
#define CH_SEP		*csv->sep
#define CH_QUOTEX	8889
#define CH_QUOTE	*csv->quo

#define useIO_EOF	0x10

#define unless(expr)	if (!(expr))

#define _is_reftype(f,x) \
    (f && ((SvGMAGICAL (f) && mg_get (f)) || 1) && SvROK (f) && SvTYPE (SvRV (f)) == x)
#define _is_arrayref(f) _is_reftype (f, SVt_PVAV)
#define _is_hashref(f)  _is_reftype (f, SVt_PVHV)
#define _is_coderef(f)  _is_reftype (f, SVt_PVCV)

#define SvSetUndef(sv)	sv_setpvn    (sv, NULL, 0)
#define SvSetEmpty(sv)	sv_setpvn_mg (sv, "",   0)

#define CSV_XS_SELF					\
    if (!self || !SvOK (self) || !SvROK (self) ||	\
	 SvTYPE (SvRV (self)) != SVt_PVHV)		\
	croak ("self is not a hash ref");		\
    hv = (HV *)SvRV (self)

/* Keep in sync with .pm! */
#define CACHE_ID_quote_char		0
#define CACHE_ID_escape_char		1
#define CACHE_ID_sep_char		2
#define CACHE_ID_binary			3
#define CACHE_ID_keep_meta_info		4
#define CACHE_ID_always_quote		5
#define CACHE_ID_allow_loose_quotes	6
#define CACHE_ID_allow_loose_escapes	7
#define CACHE_ID_allow_unquoted_escape	8
#define CACHE_ID_allow_whitespace	9
#define CACHE_ID_blank_is_undef		10
#define CACHE_ID_sep			39
#define CACHE_ID_sep_len		38
#define CACHE_ID_eol			11
#define CACHE_ID_eol_len		12
#define CACHE_ID_eol_is_cr		13
#define CACHE_ID_quo			15
#define CACHE_ID_quo_len		16
#define CACHE_ID_verbatim		22
#define CACHE_ID_empty_is_undef		23
#define CACHE_ID_auto_diag		24
#define CACHE_ID_quote_space		25
#define CACHE_ID_quote_empty		37
#define CACHE_ID__is_bound		26
#define CACHE_ID__has_ahead		30
#define CACHE_ID_escape_null		31
#define CACHE_ID_quote_binary		32
#define CACHE_ID_diag_verbose		33
#define CACHE_ID_has_error_input	34
#define CACHE_ID_decode_utf8		35
#define CACHE_ID__has_hooks		36
#define CACHE_ID_formula		38
#define CACHE_ID_strict			42
#define CACHE_ID_skip_empty_rows	43
#define CACHE_ID_undef_str		46
#define CACHE_ID_comment_str		54
#define CACHE_ID_types			62

#define	byte	unsigned char
#define ulng	unsigned long
typedef struct {
    byte	quote_char;
    byte	escape_char;
    byte	fld_idx;
    byte	binary;

    byte	keep_meta_info;
    byte	always_quote;
    byte	useIO;		/* Also used to indicate EOF */
    byte	eol_is_cr;

    byte	allow_loose_quotes;
    byte	allow_loose_escapes;
    byte	allow_unquoted_escape;
    byte	allow_whitespace;

    byte	blank_is_undef;
    byte	empty_is_undef;
    byte	verbatim;
    byte	auto_diag;

    byte	quote_space;
    byte	escape_null;
    byte	quote_binary;
    byte	first_safe_char;

    byte	diag_verbose;
    byte	has_error_input;
    byte	decode_utf8;
    byte	has_hooks;

    byte	quote_empty;
    byte	formula;
    byte	utf8;
    byte	has_ahead;

    byte	eolx;
    byte	strict;
    short	strict_n;

    byte	skip_empty_rows;

    long	is_bound;
    ulng	recno;

    byte *	cache;

    SV *	pself;	/* PL_self, for error_diag */
    HV *	self;
    SV *	bound;

    char *	types;

    byte	eol_len;
    byte	sep_len;
    byte	quo_len;
    byte	types_len;

    char *	bptr;
    SV *	tmp;
    byte	undef_flg;
    byte *	undef_str;
    byte *	comment_str;
    int		eol_pos;
    STRLEN	size;
    STRLEN	used;
    byte	eol[MAX_ATTR_LEN];
    byte	sep[MAX_ATTR_LEN];
    byte	quo[MAX_ATTR_LEN];
    char	buffer[BUFFER_SIZE];
    } csv_t;

#define bool_opt_def(o,d) \
    (((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvTRUE (*svp) : d)
#define bool_opt(o) bool_opt_def (o, 0)
#define num_opt_def(o,d) \
    (((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvIV   (*svp) : d)
#define num_opt(o)  num_opt_def  (o, 0)

typedef struct {
    int   xs_errno;
    char *xs_errstr;
    } xs_error_t;
static const xs_error_t xs_errors[] =  {

    /* Generic errors */
    { 1000, "INI - constructor failed"						},
    { 1001, "INI - sep_char is equal to quote_char or escape_char"		},
    { 1002, "INI - allow_whitespace with escape_char or quote_char SP or TAB"	},
    { 1003, "INI - \\r or \\n in main attr not allowed"				},
    { 1004, "INI - callbacks should be undef or a hashref"			},
    { 1005, "INI - EOL too long"						},
    { 1006, "INI - SEP too long"						},
    { 1007, "INI - QUOTE too long"						},
    { 1008, "INI - SEP undefined"						},

    { 1010, "INI - the header is empty"						},
    { 1011, "INI - the header contains more than one valid separator"		},
    { 1012, "INI - the header contains an empty field"				},
    { 1013, "INI - the header contains nun-unique fields"			},
    { 1014, "INI - header called on undefined stream"				},

    /* Syntax errors */
    { 1500, "PRM - Invalid/unsupported argument(s)"				},
    { 1501, "PRM - The key attribute is passed as an unsupported type"		},
    { 1502, "PRM - The value attribute is passed without the key attribute"	},
    { 1503, "PRM - The value attribute is passed as an unsupported type"	},

    /* Parse errors */
    { 2010, "ECR - QUO char inside quotes followed by CR not part of EOL"	},
    { 2011, "ECR - Characters after end of quoted field"			},
    { 2012, "EOF - End of data in parsing input stream"				},
    { 2013, "ESP - Specification error for fragments RFC7111"			},
    { 2014, "ENF - Inconsistent number of fields"				},
    { 2015, "ERW - Empty row"							},

    /*  EIQ - Error Inside Quotes */
    { 2021, "EIQ - NL char inside quotes, binary off"				},
    { 2022, "EIQ - CR char inside quotes, binary off"				},
    { 2023, "EIQ - QUO character not allowed"					},
    { 2024, "EIQ - EOF cannot be escaped, not even inside quotes"		},
    { 2025, "EIQ - Loose unescaped escape"					},
    { 2026, "EIQ - Binary character inside quoted field, binary off"		},
    { 2027, "EIQ - Quoted field not terminated"					},

    /* EIF - Error Inside Field */
    { 2030, "EIF - NL char inside unquoted verbatim, binary off"		},
    { 2031, "EIF - CR char is first char of field, not part of EOL"		},
    { 2032, "EIF - CR char inside unquoted, not part of EOL"			},
    { 2034, "EIF - Loose unescaped quote"					},
    { 2035, "EIF - Escaped EOF in unquoted field"				},
    { 2036, "EIF - ESC error"							},
    { 2037, "EIF - Binary character in unquoted field, binary off"		},

    /* Combine errors */
    { 2110, "ECB - Binary character in Combine, binary off"			},

    /* IO errors */
    { 2200, "EIO - print to IO failed. See errno"				},

    /* Hash-Ref errors */
    { 3001, "EHR - Unsupported syntax for column_names ()"			},
    { 3002, "EHR - getline_hr () called before column_names ()"			},
    { 3003, "EHR - bind_columns () and column_names () fields count mismatch"	},
    { 3004, "EHR - bind_columns () only accepts refs to scalars"		},
    { 3006, "EHR - bind_columns () did not pass enough refs for parsed fields"	},
    { 3007, "EHR - bind_columns needs refs to writable scalars"			},
    { 3008, "EHR - unexpected error in bound fields"				},
    { 3009, "EHR - print_hr () called before column_names ()"			},
    { 3010, "EHR - print_hr () called with invalid arguments"			},

    { 4001, "PRM - The key does not exist as field in the data"			},

    { 5001, "PRM - The result does not match the output to append to"		},
    { 5002, "PRM - Unsupported output"						},

    {    0, "" },
    };

static int last_error = 0;
static SV *m_getline, *m_print;

#define is_EOL(c) (c == CH_EOLX)

#define __is_SEPX(c) (c == CH_SEP && (csv->sep_len == 0 || (\
    csv->size - csv->used >= (STRLEN)csv->sep_len - 1			&&\
    !memcmp (csv->bptr + csv->used, csv->sep + 1, csv->sep_len - 1)	&&\
    (csv->used += csv->sep_len - 1)					&&\
    (c = CH_SEPX))))
#if MAINT_DEBUG > 1
static byte _is_SEPX (unsigned int c, csv_t *csv, int line) {
    unsigned int b = __is_SEPX (c);
    (void)fprintf (stderr, "# %4d - is_SEPX:\t%d (%d)\n", line, b, csv->sep_len);
    if (csv->sep_len)
	(void)fprintf (stderr,
	    "# len: %d, siz: %d, usd: %d, c: %03x, *sep: %03x\n",
	    csv->sep_len, csv->size, csv->used, c, CH_SEP);
    return b;
    } /* _is_SEPX */
#define is_SEP(c)  _is_SEPX (c, csv, __LINE__)
#else
#define is_SEP(c) __is_SEPX (c)
#endif

#define __is_QUOTEX(c) (CH_QUOTE && c == CH_QUOTE && (csv->quo_len == 0 || (\
    csv->size - csv->used >= (STRLEN)csv->quo_len - 1			&&\
    !memcmp (csv->bptr + csv->used, csv->quo + 1, csv->quo_len - 1)	&&\
    (csv->used += csv->quo_len - 1)					&&\
    (c = CH_QUOTEX))))
#if MAINT_DEBUG > 1
static byte _is_QUOTEX (unsigned int c, csv_t *csv, int line) {
    unsigned int b = __is_QUOTEX (c);
    (void)fprintf (stderr, "# %4d - is_QUOTEX:\t%d (%d)\n", line, b, csv->quo_len);

    if (csv->quo_len)
	(void)fprintf (stderr,
	    "# len: %d, siz: %d, usd: %d, c: %03x, *quo: %03x\n",
	    csv->quo_len, csv->size, csv->used, c, CH_QUOTE);
    return b;
    } /* _is_QUOTEX */
#define is_QUOTE(c)  _is_QUOTEX (c, csv, __LINE__)
#else
#define is_QUOTE(c) __is_QUOTEX (c)
#endif

#define is_whitespace(ch) \
    ( (ch) != CH_SEP           && \
      (ch) != CH_QUOTE         && \
      (ch) != csv->escape_char && \
    ( (ch) == CH_SPACE || \
      (ch) == CH_TAB \
      ) \
    )

#define SvDiag(xse)		cx_SvDiag (aTHX_ xse)
static SV *cx_SvDiag (pTHX_ int xse) {
    int   i = 0;
    SV   *err;

    while (xs_errors[i].xs_errno && xs_errors[i].xs_errno != xse) i++;
    if ((err = newSVpv (xs_errors[i].xs_errstr, 0))) {
	(void)SvUPGRADE (err, SVt_PVIV);
	SvIV_set  (err, xse);
	SvIOK_on  (err);
	}
    return (err);
    } /* SvDiag */

/* This function should be altered to deal with the optional extra argument
 * that holds the replacement message */
#define SetDiag(csv,xse)	cx_SetDiag (aTHX_ csv, xse)
static SV *cx_SetDiag (pTHX_ csv_t *csv, int xse) {
    dSP;
    SV *err   = SvDiag (xse);
    SV *pself = csv->pself;

    last_error = xse;
	(void)hv_store (csv->self, "_ERROR_DIAG",  11, err,          0);
    if (xse == 0) {
	(void)hv_store (csv->self, "_ERROR_POS",   10, newSViv  (0), 0);
	(void)hv_store (csv->self, "_ERROR_FLD",   10, newSViv  (0), 0);
	(void)hv_store (csv->self, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
	csv->has_error_input = 0;
	}
    if (xse == 2012) /* EOF */
	(void)hv_store (csv->self, "_EOF",          4, &PL_sv_yes,   0);
    if (csv->auto_diag) {
	unless (_is_hashref (pself))
	    pself = newRV_inc ((SV *)csv->self);
	ENTER;
	PUSHMARK (SP);
	XPUSHs (pself);
	PUTBACK;
	call_pv ("Text::CSV_XS::error_diag", G_VOID | G_DISCARD);
	LEAVE;
	unless (pself == csv->pself)
	    sv_free (pself);
	}
    return (err);
    } /* SetDiag */

#define xs_cache_set(hv,idx,val)	cx_xs_cache_set (aTHX_ hv, idx, val)
static void cx_xs_cache_set (pTHX_ HV *hv, int idx, SV *val) {
    SV    **svp;
    byte   *cache;

    csv_t   csvs;
    csv_t  *csv = &csvs;

    IV      iv;
    byte    bv;
    char   *cp  = "\0";
    STRLEN  len = 0;

    unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp)
	return;

    cache = (byte *)SvPV_nolen (*svp);
    (void)memcpy (csv, cache, sizeof (csv_t));

    if (SvPOK (val))
	cp = SvPV (val, len);
    if (SvIOK (val))
	iv = SvIV (val);
    else if (SvNOK (val))	/* Needed for 5.6.x but safe for 5.8.x+ */
	iv = (IV)SvNV (val);	/* uncoverable statement ancient perl required */
    else
	iv = *cp;
    bv = (unsigned)iv & 0xff;

    switch (idx) {

	/* single char/byte */
	case CACHE_ID_sep_char:
	    CH_SEP			= *cp;
	    csv->sep_len		= 0;
	    break;

	case CACHE_ID_quote_char:
	    CH_QUOTE			= *cp;
	    csv->quo_len		= 0;
	    break;

	case CACHE_ID_escape_char:           csv->escape_char           = *cp; break;

	/* boolean/numeric */
	case CACHE_ID_binary:                csv->binary                = bv; break;
	case CACHE_ID_keep_meta_info:        csv->keep_meta_info        = bv; break;
	case CACHE_ID_always_quote:          csv->always_quote          = bv; break;
	case CACHE_ID_quote_empty:           csv->quote_empty           = bv; break;
	case CACHE_ID_quote_space:           csv->quote_space           = bv; break;
	case CACHE_ID_escape_null:           csv->escape_null           = bv; break;
	case CACHE_ID_quote_binary:          csv->quote_binary          = bv; break;
	case CACHE_ID_decode_utf8:           csv->decode_utf8           = bv; break;
	case CACHE_ID_allow_loose_escapes:   csv->allow_loose_escapes   = bv; break;
	case CACHE_ID_allow_loose_quotes:    csv->allow_loose_quotes    = bv; break;
	case CACHE_ID_allow_unquoted_escape: csv->allow_unquoted_escape = bv; break;
	case CACHE_ID_allow_whitespace:      csv->allow_whitespace      = bv; break;
	case CACHE_ID_blank_is_undef:        csv->blank_is_undef        = bv; break;
	case CACHE_ID_empty_is_undef:        csv->empty_is_undef        = bv; break;
	case CACHE_ID_formula:               csv->formula               = bv; break;
	case CACHE_ID_strict:                csv->strict                = bv; break;
	case CACHE_ID_verbatim:              csv->verbatim              = bv; break;
	case CACHE_ID_skip_empty_rows:       csv->skip_empty_rows       = bv; break;
	case CACHE_ID_auto_diag:             csv->auto_diag             = bv; break;
	case CACHE_ID_diag_verbose:          csv->diag_verbose          = bv; break;
	case CACHE_ID__has_ahead:            csv->has_ahead             = bv; break;
	case CACHE_ID__has_hooks:            csv->has_hooks             = bv; break;
	case CACHE_ID_has_error_input:       csv->has_error_input       = bv; break;

	/* a 4-byte IV */
	case CACHE_ID__is_bound:             csv->is_bound              = iv; break;

	/* string */
	case CACHE_ID_sep:
	    (void)memcpy (csv->sep, cp, len);
	    csv->sep_len = len == 1 ? 0 : len;
	    break;

	case CACHE_ID_quo:
	    (void)memcpy (csv->quo, cp, len);
	    csv->quo_len = len == 1 ? 0 : len;
	    break;

	case CACHE_ID_eol:
	    (void)memcpy (csv->eol, cp, len);
	    csv->eol_len   = len;
	    csv->eol_is_cr = len == 1 && *cp == CH_CR ? 1 : 0;
	    break;

	case CACHE_ID_undef_str:
	    if (*cp) {
		csv->undef_str = (byte *)cp;
		if (SvUTF8 (val))
		    csv->undef_flg = 3;
		}
	    else {
		csv->undef_str = NULL;
		csv->undef_flg = 0;
		}
	    break;

	case CACHE_ID_comment_str:
	    csv->comment_str = *cp ? (byte *)cp : NULL;
	    break;

	case CACHE_ID_types:
	    if (cp && len) {
		csv->types     = cp;
		csv->types_len = len;
		}
	    else {
		csv->types     = NULL;
		csv->types_len = 0;
		}
	    break;

	default:
	    warn ("Unknown cache index %d ignored\n", idx);
	}

    csv->cache = cache;
    (void)memcpy (cache, csv, sizeof (csv_t));
    } /* cache_set */

#define _pretty_strl(cp)	cx_pretty_str (aTHX_ cp, strlen (cp))
#define _pretty_str(cp,xse)	cx_pretty_str (aTHX_ cp, xse)
static char *cx_pretty_str (pTHX_ byte *s, STRLEN l) {
    SV *dsv = newSVpvs_flags ("", SVs_TEMP);
    return (pv_pretty (dsv, (char *)s, l, 0, NULL, NULL,
	    (PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT)));
    } /* _pretty_str */
#define _pretty_sv(cp)		cx_pretty_sv  (aTHX_ cp)
static char *cx_pretty_sv (pTHX_ SV *sv) {
    SV *dsv = newSVpvs_flags ("", SVs_TEMP);
    if (SvOK (sv) && SvPOK (sv)) {
	STRLEN l;
	char *s = SvPV (sv, l);
	return _pretty_str (s, l);
	}
    return ("");
    } /* _pretty_sv */

#define _cache_show_byte(trim,c) \
    warn ("  %-21s  %02x:%3d\n", trim, c, c)
#define _cache_show_char(trim,c) \
    warn ("  %-21s  %02x:%s\n",  trim, c, _pretty_str (&c, 1))
#define _cache_show_str(trim,l,str) \
    warn ("  %-21s %3d:%s\n",  trim, l, _pretty_str (str, l))

#define _csv_diag(csv)	_xs_csv_diag (aTHX_ csv)
static void _xs_csv_diag (pTHX_ csv_t *csv) {
    warn ("CACHE:\n");
    _cache_show_char ("quote_char",		CH_QUOTE);
    _cache_show_char ("escape_char",		csv->escape_char);
    _cache_show_char ("sep_char",		CH_SEP);
    _cache_show_byte ("binary",			csv->binary);
    _cache_show_byte ("decode_utf8",		csv->decode_utf8);

    _cache_show_byte ("allow_loose_escapes",	csv->allow_loose_escapes);
    _cache_show_byte ("allow_loose_quotes",	csv->allow_loose_quotes);
    _cache_show_byte ("allow_unquoted_escape",	csv->allow_unquoted_escape);
    _cache_show_byte ("allow_whitespace",	csv->allow_whitespace);
    _cache_show_byte ("always_quote",		csv->always_quote);
    _cache_show_byte ("quote_empty",		csv->quote_empty);
    _cache_show_byte ("quote_space",		csv->quote_space);
    _cache_show_byte ("escape_null",		csv->escape_null);
    _cache_show_byte ("quote_binary",		csv->quote_binary);
    _cache_show_byte ("auto_diag",		csv->auto_diag);
    _cache_show_byte ("diag_verbose",		csv->diag_verbose);
    _cache_show_byte ("formula",		csv->formula);
    _cache_show_byte ("strict",			csv->strict);
    _cache_show_byte ("strict_n",		csv->strict_n);
    _cache_show_byte ("skip_empty_rows",	csv->skip_empty_rows);
    _cache_show_byte ("has_error_input",	csv->has_error_input);
    _cache_show_byte ("blank_is_undef",		csv->blank_is_undef);
    _cache_show_byte ("empty_is_undef",		csv->empty_is_undef);
    _cache_show_byte ("has_ahead",		csv->has_ahead);
    _cache_show_byte ("keep_meta_info",		csv->keep_meta_info);
    _cache_show_byte ("verbatim",		csv->verbatim);

    _cache_show_byte ("useIO",			csv->useIO);
    _cache_show_byte ("has_hooks",		csv->has_hooks);
    _cache_show_byte ("eol_is_cr",		csv->eol_is_cr);
    _cache_show_byte ("eol_len",		csv->eol_len);
    _cache_show_str  ("eol",      csv->eol_len,	csv->eol);
    _cache_show_byte ("sep_len",		csv->sep_len);
    if (csv->sep_len > 1)
	_cache_show_str ("sep",   csv->sep_len,	csv->sep);
    _cache_show_byte ("quo_len",		csv->quo_len);
    if (csv->quo_len > 1)
	_cache_show_str ("quote", csv->quo_len,	csv->quo);
    if (csv->types_len)
	_cache_show_str ("types", csv->types_len, (byte *)csv->types);
    else
	_cache_show_str ("types", 0, (byte *)"");

    if (csv->bptr)
	_cache_show_str ("bptr", (int)strlen (csv->bptr), (byte *)csv->bptr);
    if (csv->tmp && SvPOK (csv->tmp)) {
	char *s = SvPV_nolen (csv->tmp);
	_cache_show_str ("tmp",  (int)strlen (s), (byte *)s);
	}
    if (csv->cache)
	warn ("  %-20s %4d:0x%08lx\n", "cache", (int)sizeof (csv_t), (unsigned long)csv->cache);
    else
	warn ("  %-22s --:no cache yet\n", "cache");
    } /* _csv_diag */

#define xs_cache_diag(hv)	cx_xs_cache_diag (aTHX_ hv)
static void cx_xs_cache_diag (pTHX_ HV *hv) {
    SV   **svp;
    byte  *cache;
    csv_t  csvs;
    csv_t *csv = &csvs;

    unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp) {
	warn ("CACHE: invalid\n");
	return;
	}

    cache = (byte *)SvPV_nolen (*svp);
    (void)memcpy (csv, cache, sizeof (csv_t));
    _csv_diag (csv);
    } /* xs_cache_diag */

#define set_eol_is_cr(csv)	cx_set_eol_is_cr (aTHX_ csv)
static void cx_set_eol_is_cr (pTHX_ csv_t *csv) {
    csv->eol[0]    = CH_CR;
    csv->eol_is_cr = 1;
    csv->eol_len   = 1;
    (void)memcpy (csv->cache, csv, sizeof (csv_t));

    (void)hv_store (csv->self, "eol",  3, newSVpvn ((char *)csv->eol, 1), 0);
    } /* set_eol_is_cr */

#define SetupCsv(csv,self,pself)	cx_SetupCsv (aTHX_ csv, self, pself)
static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself) {
    SV	       **svp;
    STRLEN	 len;
    char	*ptr;

    last_error = 0;

    if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
	byte *cache = (byte *)SvPVX (*svp);
	(void)memcpy (csv, cache, sizeof (csv_t));
	}
    else {
	SV *sv_cache;

	(void)memset (csv, 0, sizeof (csv_t)); /* Reset everything */

	csv->self  = self;
	csv->pself = pself;

	CH_SEP = ',';
	if ((svp = hv_fetchs (self, "sep_char",       FALSE)) && *svp && SvOK (*svp))
	    CH_SEP = *SvPV (*svp, len);
	if ((svp = hv_fetchs (self, "sep",            FALSE)) && *svp && SvOK (*svp)) {
	    ptr = SvPV (*svp, len);
	    (void)memcpy (csv->sep, ptr, len);
	    if (len > 1)
		csv->sep_len = len;
	    }

	CH_QUOTE = '"';
	if ((svp = hv_fetchs (self, "quote_char",     FALSE)) && *svp) {
	    if (SvOK (*svp)) {
		ptr = SvPV (*svp, len);
		CH_QUOTE = len ? *ptr : (char)0;
		}
	    else
		CH_QUOTE = (char)0;
	    }
	if ((svp = hv_fetchs (self, "quote",          FALSE)) && *svp && SvOK (*svp)) {
	    ptr = SvPV (*svp, len);
	    (void)memcpy (csv->quo, ptr, len);
	    if (len > 1)
		csv->quo_len = len;
	    }

	csv->escape_char = '"';
	if ((svp = hv_fetchs (self, "escape_char",    FALSE)) && *svp) {
	    if (SvOK (*svp)) {
		ptr = SvPV (*svp, len);
		csv->escape_char = len ? *ptr : (char)0;
		}
	    else
		csv->escape_char = (char)0;
	    }

	if ((svp = hv_fetchs (self, "eol",            FALSE)) && *svp && SvOK (*svp)) {
	    char *eol = SvPV (*svp, len);
	    (void)memcpy (csv->eol, eol, len);
	    csv->eol_len = len;
	    if (len == 1 && *csv->eol == CH_CR)
		csv->eol_is_cr = 1;
	    }

	csv->undef_flg = 0;
	if ((svp = hv_fetchs (self, "undef_str",      FALSE)) && *svp && SvOK (*svp)) {
		/*if (sv && (SvOK (sv) || (
			(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv))))) {*/
	    csv->undef_str = (byte *)SvPV_nolen (*svp);
	    if (SvUTF8 (*svp))
		csv->undef_flg = 3;
	    }
	else
	    csv->undef_str = NULL;

	if ((svp = hv_fetchs (self, "comment_str",    FALSE)) && *svp && SvOK (*svp))
	    csv->comment_str = (byte *)SvPV_nolen (*svp);
	else
	    csv->comment_str = NULL;

	if ((svp = hv_fetchs (self, "_types",         FALSE)) && *svp && SvOK (*svp)) {
	    csv->types = SvPV (*svp, len);
	    csv->types_len = len;
	    }

	if ((svp = hv_fetchs (self, "_is_bound",      FALSE)) && *svp && SvOK (*svp))
	    csv->is_bound = SvIV (*svp);
	if ((svp = hv_fetchs (self, "callbacks",      FALSE)) && _is_hashref (*svp)) {
	    HV *cb = (HV *)SvRV (*svp);
	    if ((svp = hv_fetchs (cb, "after_parse",  FALSE)) && _is_coderef (*svp))
		csv->has_hooks |= HOOK_AFTER_PARSE;
	    if ((svp = hv_fetchs (cb, "before_print", FALSE)) && _is_coderef (*svp))
		csv->has_hooks |= HOOK_BEFORE_PRINT;
	    }

	csv->binary			= bool_opt ("binary");
	csv->decode_utf8		= bool_opt ("decode_utf8");
	csv->always_quote		= bool_opt ("always_quote");
	csv->strict			= bool_opt ("strict");
	csv->quote_empty		= bool_opt ("quote_empty");
	csv->quote_space		= bool_opt_def ("quote_space",  1);
	csv->escape_null		= bool_opt_def ("escape_null",  1);
	csv->quote_binary		= bool_opt_def ("quote_binary", 1);
	csv->allow_loose_quotes		= bool_opt ("allow_loose_quotes");
	csv->allow_loose_escapes	= bool_opt ("allow_loose_escapes");
	csv->allow_unquoted_escape	= bool_opt ("allow_unquoted_escape");
	csv->allow_whitespace		= bool_opt ("allow_whitespace");
	csv->blank_is_undef		= bool_opt ("blank_is_undef");
	csv->empty_is_undef		= bool_opt ("empty_is_undef");
	csv->verbatim			= bool_opt ("verbatim");

	csv->auto_diag			= num_opt ("auto_diag");
	csv->diag_verbose		= num_opt ("diag_verbose");
	csv->keep_meta_info		= num_opt ("keep_meta_info");
	csv->skip_empty_rows		= num_opt ("skip_empty_rows");
	csv->formula			= num_opt ("formula");

	unless (csv->escape_char) csv->escape_null = 0;

	sv_cache = newSVpvn ((char *)csv, sizeof (csv_t));
	csv->cache = (byte *)SvPVX (sv_cache);
	SvREADONLY_on (sv_cache);

	(void)memcpy (csv->cache, csv, sizeof (csv_t));

	(void)hv_store (self, "_CACHE", 6, sv_cache, 0);
	}

    csv->utf8 = 0;
    csv->size = 0;
    csv->used = 0;

    /* This is EBCDIC-safe, as it is used after translation */
    csv->first_safe_char = csv->quote_space ? 0x21 : 0x20;

    if (csv->is_bound) {
	if ((svp = hv_fetchs (self, "_BOUND_COLUMNS", FALSE)) && _is_arrayref (*svp))
	    csv->bound = *svp;
	else
	    csv->is_bound = 0;
	}

    csv->eol_pos = -1;
    csv->eolx = csv->eol_len 
	? csv->verbatim || csv->eol_len >= 2
	    ? 1
	    : csv->eol[0] == CH_CR || csv->eol[0] == CH_NL
		? 0
		: 1
	: 0;
    if (csv->sep_len > 1 && is_utf8_string ((U8 *)(csv->sep), csv->sep_len))
	csv->utf8 = 1;
    if (csv->quo_len > 1 && is_utf8_string ((U8 *)(csv->quo), csv->quo_len))
	csv->utf8 = 1;
    } /* SetupCsv */

#define Print(csv,dst)		cx_Print (aTHX_ csv, dst)
static int cx_Print (pTHX_ csv_t *csv, SV *dst) {
    int result;
    int keep = 0;

    if (csv->useIO) {
	SV *tmp = newSVpvn_flags (csv->buffer, csv->used, SVs_TEMP);
	dSP;
	PUSHMARK (sp);
	EXTEND (sp, 2);
	PUSHs ((dst));
	if (csv->utf8) {
	    STRLEN	 len;
	    char	*ptr;
	    int		 j;

	    ptr = SvPV (tmp, len);
	    while (len > 0 && !is_utf8_sv (tmp) && keep < 16) {
		ptr[--len] = (char)0;
		SvCUR_set (tmp, len);
		keep++;
		}
	    for (j = 0; j < keep; j++)
		csv->buffer[j] = csv->buffer[csv->used - keep + j];
	    SvUTF8_on (tmp);
	    }
	PUSHs (tmp);
	PUTBACK;
	result = call_sv (m_print, G_METHOD);
	SPAGAIN;
	if (result) {
	    result = POPi;
	    unless (result)
		(void)SetDiag (csv, 2200);
	    }
	PUTBACK;
	}
    else {
	sv_catpvn (SvRV (dst), csv->buffer, csv->used);
	result = TRUE;
	}
    if (csv->utf8 && !csv->useIO && csv->decode_utf8
		  && SvROK (dst) && is_utf8_sv (SvRV (dst)))
	SvUTF8_on (SvRV (dst));
    csv->used = keep;
    return result;
    } /* Print */

#define CSV_PUT(csv,dst,c) {				\
    if ((csv)->used == sizeof ((csv)->buffer) - 1) {	\
	unless (Print ((csv), (dst)))			\
	    return FALSE;				\
	}						\
    (csv)->buffer[(csv)->used++] = (c);			\
    }

#define bound_field(csv,i,keep)	cx_bound_field (aTHX_ csv, i, keep)
static SV *cx_bound_field (pTHX_ csv_t *csv, SSize_t i, int keep) {
    SV *sv = csv->bound;
    AV *av;

    /* fprintf (stderr, "# New bind %d/%d\n", i, csv->is_bound);\ */
    if (i >= csv->is_bound) {
	(void)SetDiag (csv, 3006);
	return (NULL);
	}

    if (sv && SvROK (sv)) {
	av = (AV *)(SvRV (sv));
	/* fprintf (stderr, "# Bind %d/%d/%d\n", i, csv->is_bound, av_len (av)); */
	sv = *av_fetch (av, i, FALSE);
	if (sv && SvROK (sv)) {
	    sv = SvRV (sv);
	    if (keep)
		return (sv);

	    unless (SvREADONLY (sv)) {
		SvSetEmpty (sv);
		return (sv);
		}
	    }
	}
    (void)SetDiag (csv, 3008);
    return (NULL);
    } /* bound_field */

#define was_quoted(mf,idx)	cx_was_quoted (aTHX_ mf, idx)
static int cx_was_quoted (pTHX_ AV *mf, int idx) {
    SV **x = av_fetch (mf, idx, FALSE);
    return (x && SvIOK (*x) && SvIV (*x) & CSV_FLAGS_QUO ? 1 : 0);
    } /* was_quoted */

#define _formula(csv,sv,len,f) cx_formula (aTHX_ csv, sv, len, f)
static char *cx_formula (pTHX_ csv_t *csv, SV *sv, STRLEN *len, int f) {

    int fa = csv->formula;

    if (fa == 1) die   ("Formulas are forbidden\n");
    if (fa == 2) croak ("Formulas are forbidden\n");

    if (fa == 3) {
	char *ptr = SvPV_nolen (sv);
	char  rec[40];
	char  field[128];
	SV  **svp;

	if (csv->recno) (void)sprintf (rec, " in record %lu", csv->recno + 1);
	else           *rec = (char)0;

	*field = (char)0;
	if ((svp = hv_fetchs (csv->self, "_COLUMN_NAMES", FALSE)) && _is_arrayref (*svp)) {
	    AV *avp = (AV *)SvRV (*svp);
	    if (avp && av_len (avp) >= (f - 1)) {
		SV **fnm = av_fetch (avp, f - 1, FALSE);
		if (fnm && *fnm && SvOK (*fnm))
		    (void)sprintf (field, " (column: '%.100s')", SvPV_nolen (*fnm));
		}
	    }

	warn ("Field %d%s%s contains formula '%s'\n", f, field, rec, ptr);
	return ptr;
	}

    if (len) *len = 0;

    if (fa == 4) {
	unless (SvREADONLY (sv)) SvSetEmpty (sv);
	return "";
	}

    if (fa == 5) {
	unless (SvREADONLY (sv)) SvSetUndef (sv);
	return NULL;
	}

    if (fa == 6) {
	int result;
	SV **svp = hv_fetchs (csv->self, "_FORMULA_CB", FALSE);
	if (svp && _is_coderef (*svp)) {
	    dSP;
	    ENTER;
	    SAVE_DEFSV; /* local $_ */
	    DEFSV = sv;
	    PUSHMARK (SP);
	    PUTBACK;
	    result = call_sv (*svp, G_SCALAR);
	    SPAGAIN;
	    if (result)
		sv_setsv (sv, POPs);
	    PUTBACK;
	    LEAVE;
	    }
	return len ? SvPV (sv, *len) : SvPV_nolen (sv);
	}

    /* So far undefined behavior */
    return NULL;
    } /* _formula */

#define SkipEmptyRow	{\
    int ser = csv->skip_empty_rows;					\
									\
    if (ser == 3) { (void)SetDiag (csv, 2015); die   ("Empty row"); }	\
    if (ser == 4) { (void)SetDiag (csv, 2015); croak ("Empty row"); }	\
    if (ser == 5) { (void)SetDiag (csv, 2015); return FALSE;        }	\
									\
    if (ser <= 2) {	/* skip & eof */				\
	csv->fld_idx = 0;						\
	c = CSV_GET;							\
	if (c == EOF || ser == 2) {					\
	    sv_free (sv);						\
	    sv = NULL;							\
	    seenSomething = FALSE;						\
	    if (ser == 2) return FALSE;					\
	    break;							\
	    }								\
	}								\
									\
    if (ser == 6) {							\
	int  result, n, i;						\
	SV  *rv, **svp = hv_fetchs (csv->self, "_EMPTROW_CB", FALSE);	\
	AV  *avp;							\
	unless (svp && _is_coderef (*svp))				\
	    return FALSE; /* A callback is wanted, but none found */	\
									\
	dSP;								\
	ENTER;								\
	SAVE_DEFSV; /* local $_ */					\
	DEFSV = sv;							\
	PUSHMARK (SP);							\
	PUTBACK;							\
	result = call_sv (*svp, G_SCALAR);				\
	SPAGAIN;							\
	unless (result) {						\
	    /* A false return will stop the parsing */			\
	    sv_free (sv);						\
	    sv = NULL;							\
	    waitingForField = 0;					\
	    return FALSE;						\
	    }								\
									\
	PUTBACK;							\
	LEAVE;								\
									\
	rv = POPs;							\
	/* Result should be a ref to a list. */				\
	unless (_is_arrayref (rv))					\
	    return FALSE;						\
									\
	avp = (AV *)SvRV (rv);						\
									\
	unless (avp) return FALSE;					\
	n = av_len (avp);						\
	if (n <= 0)  return TRUE;					\
									\
	if (csv->is_bound && csv->is_bound < n)				\
	    n = csv->is_bound - 1;					\
									\
	for (i = 0; i <= n; i++) {					\
	    SV **svp = av_fetch (avp, i, FALSE);			\
	    sv = svp && *svp ? *svp : NULL;				\
	    if (sv) {							\
		SvREFCNT_inc (sv);					\
		/* upgrade IV to IVPV if needed */			\
		(void)SvPV_nolen (sv);					\
		}							\
	    AV_PUSH;							\
	    }								\
	return TRUE;							\
	}								\
    }

#define Combine(csv,dst,fields)	cx_Combine (aTHX_ csv, dst, fields)
static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields) {
    SSize_t i, n;
    int     bound = 0;
    int     aq  = (int)csv->always_quote;
    int     qe  = (int)csv->quote_empty;
    int     kmi = (int)csv->keep_meta_info;
    AV     *qm  = NULL;

    n = (IV)av_len (fields);
    if (n < 0 && csv->is_bound) {
	n = csv->is_bound - 1;
	bound = 1;
	}

    if (kmi >= 10) {
	SV **svp;
	if ((svp = hv_fetchs (csv->self, "_FFLAGS", FALSE)) && _is_arrayref (*svp)) {
	    AV *avp = (AV *)SvRV (*svp);
	    if (avp && av_len (avp) >= n)
		qm = avp;
	    }
	}

    for (i = 0; i <= n; i++) {
	SV     *sv;
	STRLEN  len = 0;
	char   *ptr = NULL;

	if (i > 0) {
	    CSV_PUT (csv, dst, CH_SEP);
	    if (csv->sep_len) {
		int x;
		for (x = 1; x < (int)csv->sep_len; x++)
		    CSV_PUT (csv, dst, csv->sep[x]);
		}
	    }

	if (bound)
	    sv = bound_field (csv, i, 1);
	else {
	    SV **svp = av_fetch (fields, i, FALSE);
	    sv = svp && *svp ? *svp : NULL;
	    }

	if (sv && (SvOK (sv) || (
		(SvGMAGICAL (sv) && (mg_get (sv), 1) && SvOK (sv))))) {

	    int	    quoteMe;

	    ptr = SvPV (sv, len);

	    if (*ptr == '=' && csv->formula) {
		unless (ptr = _formula (csv, sv, &len, i))
		    continue;
		}
	    if (len == 0)
		quoteMe = aq ? 1 : qe ? 1 : qm ? was_quoted (qm, i) : 0;
	    else {

		if (SvUTF8 (sv))  {
		    csv->utf8   = 1;
		    csv->binary = 1;
		    }

		quoteMe = aq ? 1 : qm ? was_quoted (qm, i) : 0;

		/* Do we need quoting? We do quote, if the user requested
		 * (always_quote), if binary or blank characters are found
		 * and if the string contains quote or escape characters.
		 */
		if (!quoteMe &&
		   ( quoteMe = (!SvIOK (sv) && !SvNOK (sv) && CH_QUOTE))) {
		    char	*ptr2;
		    STRLEN	 l;

#if MAINT_DEBUG > 6
		    (void)fprintf (stderr, "# %04d Combine:\n", __LINE__);
		    sv_dump (sv);
#else
#if MAINT_DEBUG > 4
		    (void)fprintf (stderr, "# %04d Combine: '%s'\n", __LINE__, _pretty_sv (sv));
#endif
#endif
		    for (ptr2 = ptr, l = len; l; ++ptr2, --l) {
			byte c = *ptr2;
#ifdef IS_EBCDIC
			byte x = ebcdic2ascii[c];
#if MAINT_DEBUG > 4
			(void)fprintf (stderr, " %02x", x);
#endif
#else
			byte x = c;
#endif

			if ((CH_QUOTE          && c == CH_QUOTE)          ||
			    (CH_SEP            && c == CH_SEP)            ||
			    (csv->escape_char  && c == csv->escape_char)  ||
			    (csv->quote_binary ? (x >= 0x7f && x <= 0xa0) ||
						  x < csv->first_safe_char
					       :  c == CH_NL || c == CH_CR ||
						 (csv->quote_space && (
						  c == CH_SPACE || c == CH_TAB)))) {
			    /* Binary character */
			    break;
			    }
			}
#if defined(IS_EBCDIC) && MAINT_DEBUG > 4
		    (void)fprintf (stderr, "\n");
#endif
		    quoteMe = (l > 0);
		    }
		}
	    if (quoteMe) {
		CSV_PUT (csv, dst, CH_QUOTE);
		if (csv->quo_len) {
		    int x;
		    for (x = 1; x < (int)csv->quo_len; x++)
			CSV_PUT (csv, dst, csv->quo[x]);
		    }
		}
	    while (len-- > 0) {
		char	c = *ptr++;
		int	e = 0;

		if (!csv->binary && is_csv_binary (c)) {
		    SvREFCNT_inc (sv);
		    csv->has_error_input = 1;
		    unless (hv_store (csv->self, "_ERROR_INPUT", 12, sv, 0))
			SvREFCNT_dec (sv); /* uncoverable statement memory fail */
		    (void)SetDiag (csv, 2110);
		    return FALSE;
		    }
		if (CH_QUOTE && (byte)c == CH_QUOTE && (csv->quo_len == 0 ||
			 memcmp (ptr, csv->quo +1, csv->quo_len - 1) == 0))
		    e = 1;
		else
		if (c == csv->escape_char && csv->escape_char)
		    e = 1;
		else
		if (c == (char)0          && csv->escape_null) {
		    e = 1;
		    c = '0';
		    }
		if (e && csv->escape_char)
		    CSV_PUT (csv, dst, csv->escape_char);
		CSV_PUT (csv, dst, c);
		}
	    if (quoteMe) {
		CSV_PUT (csv, dst, CH_QUOTE);
		if (csv->quo_len) {
		    int x;
		    for (x = 1; x < (int)csv->quo_len; x++)
			CSV_PUT (csv, dst, csv->quo[x]);
		    }
		}
	    }
	else {
	    if (csv->undef_str) {
		byte  *ptr = csv->undef_str;
		STRLEN len = strlen ((char *)ptr);

		if (csv->undef_flg) {
		    csv->utf8   = 1;
		    csv->binary = 1;
		    }

		while (len--)
		    CSV_PUT (csv, dst, *ptr++);
		}
	    }
	}
    if (csv->eol_len) {
	STRLEN	len = csv->eol_len;
	byte   *ptr = csv->eol;

	while (len--)
	    CSV_PUT (csv, dst, *ptr++);
	}
    if (csv->used)
	return Print (csv, dst);
    return TRUE;
    } /* Combine */

#define ParseError(csv,xse,pos)	cx_ParseError (aTHX_ csv, xse, pos)
static void cx_ParseError (pTHX_ csv_t *csv, int xse, STRLEN pos) {
    (void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (pos), 0);
    (void)hv_store (csv->self, "_ERROR_FLD", 10, newSViv (csv->fld_idx), 0);
    if (csv->tmp) {
	csv->has_error_input = 1;
	if (hv_store (csv->self, "_ERROR_INPUT", 12, csv->tmp, 0))
	    SvREFCNT_inc (csv->tmp);
	}
    (void)SetDiag (csv, xse);
    } /* ParseError */

#define CsvGet(csv,src)		cx_CsvGet (aTHX_ csv, src)
static int cx_CsvGet (pTHX_ csv_t *csv, SV *src) {
    unless (csv->useIO)
	return EOF;

    if (csv->tmp && csv->eol_pos >= 0) {
	csv->eol_pos = -2;
	sv_setpvn (csv->tmp, (char *)csv->eol, csv->eol_len);
	csv->bptr = SvPV (csv->tmp, csv->size);
	csv->used = 0;
	return CH_EOLX;
	}

    {	STRLEN		result;
	dSP;

	PUSHMARK (sp);
	EXTEND (sp, 1);
	PUSHs (src);
	PUTBACK;
	result = call_sv (m_getline, G_METHOD);
	SPAGAIN;
	csv->eol_pos = -1;
	csv->tmp = result ? POPs : NULL;
	PUTBACK;

#if MAINT_DEBUG > 6
	(void)fprintf (stderr, "# %04d getline () returned:\n", __LINE__);
	sv_dump (csv->tmp);
#else
#if MAINT_DEBUG > 4
	(void)fprintf (stderr, "# %04d getline () returned: '%s'\n", __LINE__, _pretty_sv (csv->tmp));
#endif
#endif
	}
    if (csv->tmp && SvOK (csv->tmp)) {
	STRLEN tmp_len;
	csv->bptr = SvPV (csv->tmp, tmp_len);
	csv->used = 0;
	csv->size = tmp_len;
	if (csv->eolx && csv->size >= csv->eol_len) {
	    int i, match = 1;
	    for (i = 1; i <= (int)csv->eol_len; i++) {
		unless (csv->bptr[csv->size - i] == csv->eol[csv->eol_len - i]) {
		    match = 0;
		    break;
		    }
		}
	    if (match) {
#if MAINT_DEBUG > 4
		(void)fprintf (stderr, "# %04d EOLX match, size: %d\n", __LINE__, csv->size);
#endif
		csv->size -= csv->eol_len;
		unless (csv->verbatim)
		    csv->eol_pos = csv->size;
		csv->bptr[csv->size] = (char)0;
		SvCUR_set (csv->tmp, csv->size);
		unless (csv->verbatim || csv->size)
		    return CH_EOLX;
		}
	    }
	if (SvUTF8 (csv->tmp)) csv->utf8 = 1;
	if (tmp_len)
	    return ((byte)csv->bptr[csv->used++]);
	}
    csv->useIO |= useIO_EOF;
    return EOF;
    } /* CsvGet */

#define ERROR_INSIDE_QUOTES(diag_code) {	\
    unless (csv->is_bound) SvREFCNT_dec (sv);	\
    ParseError (csv, diag_code, csv->used - 1);	\
    return FALSE;				\
    }
#define ERROR_INSIDE_FIELD(diag_code) {		\
    unless (csv->is_bound) SvREFCNT_dec (sv);	\
    ParseError (csv, diag_code, csv->used - 1);	\
    return FALSE;				\
    }

#if MAINT_DEBUG > 4
#define PUT_RPT       (void)fprintf (stderr, "# %04d CSV_PUT: 0x%02x '%c'\n", __LINE__, c, isprint (c) ? c : '?')
#define PUT_SEPX_RPT1 (void)fprintf (stderr, "# %04d PUT SEPX\n", __LINE__)
#define PUT_SEPX_RPT2 (void)fprintf (stderr, "# %04d Done putting SEPX\n")
#define PUT_QUOX_RPT1 (void)fprintf (stderr, "# %04d PUT QUOX\n", __LINE__)
#define PUT_QUOX_RPT2 (void)fprintf (stderr, "# %04d Done putting QUOX\n")
#define PUT_EOLX_RPT1 (void)fprintf (stderr, "# %04d PUT EOLX\n", __LINE__)
#define PUT_EOLX_RPT2 (void)fprintf (stderr, "# %04d Done putting EOLX\n")
#if MAINT_DEBUG > 6
#define PUSH_RPT      (void)fprintf (stderr, "# %04d AV_PUSHd\n", __LINE__); sv_dump (sv)
#else
#define PUSH_RPT      (void)fprintf (stderr, "# %04d AV_PUSHd '%s'\n", __LINE__, _pretty_sv (sv))
#endif
#else
#define PUT_RPT
#define PUT_SEPX_RPT1
#define PUT_SEPX_RPT2
#define PUT_QUOX_RPT1
#define PUT_QUOX_RPT2
#define PUT_EOLX_RPT1
#define PUT_EOLX_RPT2
#define PUSH_RPT
#endif
#define CSV_PUT_SV1(c) {			\
    len = SvCUR ((sv));				\
    SvGROW ((sv), len + 2);			\
    *SvEND ((sv)) = c;				\
    PUT_RPT;					\
    SvCUR_set ((sv), len + 1);			\
    }
#define CSV_PUT_SV(c) {				\
    if (c == CH_EOLX) {				\
	int x; PUT_EOLX_RPT1;			\
	if (csv->eol_pos == -2)			\
	    csv->size = 0;			\
	for (x = 0; x < (int)csv->eol_len; x++)	\
	    CSV_PUT_SV1 (csv->eol[x]);		\
	csv->eol_pos = -1;			\
	PUT_EOLX_RPT2;				\
	}					\
    else if (c == CH_SEPX) {			\
	int x; PUT_SEPX_RPT1;			\
	for (x = 0; x < (int)csv->sep_len; x++)	\
	    CSV_PUT_SV1 (csv->sep[x]);		\
	PUT_SEPX_RPT2;				\
	}					\
    else if (c == CH_QUOTEX) {			\
	int x; PUT_QUOX_RPT1;			\
	for (x = 0; x < (int)csv->quo_len; x++)	\
	    CSV_PUT_SV1 (csv->quo[x]);		\
	PUT_QUOX_RPT2;				\
	}					\
    else					\
	CSV_PUT_SV1 (c);			\
    }

#define CSV_GET1 \
    (csv->used < csv->size ? (byte)csv->bptr[csv->used++] : CsvGet (csv, src))

#if MAINT_DEBUG > 3
int CSV_GET_ (pTHX_ csv_t *csv, SV *src, int l) {
    int c;
    (void)fprintf (stderr, "# %04d 1-CSV_GET: (used: %d, size: %d, eol_pos: %d, eolx = %d)\n", l, csv->used, csv->size, csv->eol_pos, csv->eolx);
    c = CSV_GET1;
    (void)fprintf (stderr, "# %04d 2-CSV_GET: 0x%02x '%c'\n", l, c, isprint (c) ? c : '?');
    return (c);
    } /* CSV_GET_ */
#define CSV_GET CSV_GET_ (aTHX_ csv, src, __LINE__)
#else
#define CSV_GET CSV_GET1
#endif

#define AV_PUSH { \
    int svc;								\
    *SvEND (sv) = (char)0;						\
    svc = SvCUR (sv);							\
    SvUTF8_off (sv);							\
    if (svc && csv->formula && *(SvPV_nolen (sv)) == '=')		\
	(void)_formula (csv, sv, NULL, fnum);				\
    if (svc == 0 && (							\
	    csv->empty_is_undef ||					\
	    (!(f & CSV_FLAGS_QUO) && csv->blank_is_undef)))		\
	SvSetUndef (sv);						\
    else {								\
	if (csv->allow_whitespace && ! (f & CSV_FLAGS_QUO))		\
	    strip_trail_whitespace (sv);				\
	if (f & CSV_FLAGS_BIN && csv->decode_utf8			\
			      && (csv->utf8 || is_utf8_sv (sv)))	\
	    SvUTF8_on (sv);						\
	}								\
    SvSETMAGIC (sv);							\
    unless (csv->is_bound) av_push (fields, sv);			\
    PUSH_RPT;								\
    sv = NULL;								\
    if (csv->keep_meta_info && fflags)					\
	av_push (fflags, newSViv (f));					\
    waitingForField = 1;						\
    }

#define strip_trail_whitespace(sv)	cx_strip_trail_whitespace (aTHX_ sv)
static void cx_strip_trail_whitespace (pTHX_ SV *sv) {
    STRLEN len;
    char   *s = SvPV (sv, len);
    unless (s && len) return;
    while (s[len - 1] == CH_SPACE || s[len - 1] == CH_TAB)
	s[--len] = (char)0;
    SvCUR_set (sv, len);
    } /* strip_trail_whitespace */

#define NewField				\
    unless (sv) {				\
	if (csv->is_bound)			\
	    sv = bound_field (csv, fnum, 0);	\
	else					\
	    sv = newSVpvs ("");			\
	fnum++;					\
	unless (sv) return FALSE;		\
	f = 0; csv->fld_idx++;			\
	}

#if MAINT_DEBUG
static char str_parsed[40];
#endif

#if MAINT_DEBUG > 1
static char _sep[64];
static char *_sep_string (csv_t *csv) {
    if (csv->sep_len) {
	int x;
	for (x = 0; x < csv->sep_len; x++)
	    (void)sprintf (_sep + x * x, "%02x ", csv->sep[x]);
	}
    else
	(void)sprintf (_sep, "'%c' (0x%02x)", CH_SEP, CH_SEP);
    return _sep;
    } /* _sep_string */
#endif

#define Parse(csv,src,fields,fflags)	cx_Parse (aTHX_ csv, src, fields, fflags)
static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags) {
    int		 c, f = 0;
    int		 waitingForField	= 1;
    SV		*sv			= NULL;
    STRLEN	 len;
    int		 seenSomething		= FALSE;
    int		 fnum			= 0;
    int		 spl			= -1;
#if MAINT_DEBUG
    (void)memset (str_parsed, 0, 40);
#endif

    csv->fld_idx = 0;

    while ((c = CSV_GET) != EOF) {

	NewField;

	seenSomething = TRUE;
	spl++;
#if MAINT_DEBUG
	if (spl < 39) str_parsed[spl] = c;
#endif
restart:
#if MAINT_DEBUG > 9
	(void)fprintf (stderr, "# %04d at restart: %d/%d/%03x pos %d = 0x%02x\n",
	    __LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
#endif
	if (is_SEP (c)) {
#if MAINT_DEBUG > 1
	    (void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = SEP %s\t%s\n",
		__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
		_sep_string (csv), _pretty_strl (csv->bptr + csv->used));
#endif
	    if (waitingForField) {
		/* ,1,"foo, 3",,bar,
		 * ^           ^
		 */
		if (csv->blank_is_undef || csv->empty_is_undef)
		    SvSetUndef (sv);
		else
		    SvSetEmpty (sv);
		unless (csv->is_bound)
		    av_push (fields, sv);
		sv = NULL;
		if (csv->keep_meta_info && fflags)
		    av_push (fflags, newSViv (f));
		}
	    else
	    if (f & CSV_FLAGS_QUO) {
		/* ,1,"foo, 3",,bar,
		 *        ^
		 */
		CSV_PUT_SV (c)
		}
	    else {
		/* ,1,"foo, 3",,bar,
		 *   ^        ^    ^
		 */
		AV_PUSH;
		}
	    } /* SEP char */
	else
	if (is_QUOTE (c)) {
#if MAINT_DEBUG > 1
	    (void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = QUO '%c'\t\t%s\n",
		__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
		_pretty_strl (csv->bptr + csv->used));
#endif
	    if (waitingForField) {
		/* ,1,"foo, 3",,bar,\r\n
		 *    ^
		 */
		f |= CSV_FLAGS_QUO;
		waitingForField = 0;
		continue;
		}

	    if (f & CSV_FLAGS_QUO) {

		/* ,1,"foo, 3",,bar,\r\n
		 *           ^
		 */

		int quoesc = 0;
		int c2 = CSV_GET;

		if (csv->allow_whitespace) {
		    /* , 1 , "foo, 3" , , bar , \r\n
		     *               ^
		     */
		    while (is_whitespace (c2)) {
			if (csv->allow_loose_quotes &&
				!(csv->escape_char && c2 == csv->escape_char)) {
			    /* This feels like a brittle fix for RT115953, where
			     *  ["foo "bar" baz"] got parsed as [foo "bar"baz]
			     * when both allow_whitespace and allow_loose_quotes
			     * are true and escape does not equal quote
			     */
			    CSV_PUT_SV (c);
			    c = c2;
			    }
			c2 = CSV_GET;
			}
		    }

		if (is_SEP (c2)) {
		    /* ,1,"foo, 3",,bar,\r\n
		     *            ^
		     */
		    AV_PUSH;
		    continue;
		    }

		if (c2 == CH_NL || c2 == CH_EOLX) {
		    /* ,1,"foo, 3",,"bar"\n
		     *                   ^
		     */
		    AV_PUSH;
		    return TRUE;
		    }

		/* ---
		 * if      QUOTE eq ESCAPE
		 *    AND (    c2  eq QUOTE	1,"abc""def",2
		 *         OR  c2  eq ESCAPE	1,"abc""def",2 (QUO eq ESC)
		 *         OR  c2  eq NULL )	1,"abc"0def",2
		 * ---
		 */
		if (csv->escape_char && c == csv->escape_char) {

		    quoesc = 1;
		    if (c2 == '0') {
			/* ,1,"foo, 3"056",,bar,\r\n
			 *            ^
			 */
			CSV_PUT_SV (0)
			continue;
			}

		    if (is_QUOTE (c2)) {
			/* ,1,"foo, 3""56",,bar,\r\n
			 *            ^
			 */
			if (csv->utf8)
			    f |= CSV_FLAGS_BIN;
			CSV_PUT_SV (c2)
			continue;
			}

		    if (csv->allow_loose_escapes && c2 != CH_CR) {
			/* ,1,"foo, 3"56",,bar,\r\n
			 *            ^
			 */
			CSV_PUT_SV (c);
			c = c2;
			goto restart;
			}
		    }

		if (c2 == CH_CR) {
		    int	c3;

		    if (csv->eol_is_cr) {
			/* ,1,"foo, 3"\r
			 *            ^
			 */
			AV_PUSH;
			return TRUE;
			}

		    c3 = CSV_GET;

		    if (c3 == CH_NL) { /* \r is not optional before EOLX! */
			/* ,1,"foo, 3"\r\n
			 *              ^
			 */
			AV_PUSH;
			return TRUE;
			}

		    if (csv->useIO && csv->eol_len == 0) {
			if (c3 == CH_CR) { /* \r followed by an empty line */
			    /* ,1,"foo, 3"\r\r
			     *              ^
			     */
			    set_eol_is_cr (csv);
			    goto EOLX;
			    }

			if (!is_csv_binary (c3)) {
			    /* ,1,"foo\n 3",,"bar"\r
			     * baz,4
			     * ^
			     */
			    set_eol_is_cr (csv);
			    csv->used--;
			    csv->has_ahead++;
			    AV_PUSH;
			    return TRUE;
			    }
			}

		    ParseError (csv, quoesc ? 2023 : 2010, csv->used - 2);
		    return FALSE;
		    }

		if (c2 == EOF) {
		    /* ,1,"foo, 3"
		     *            ^
		     */
		    AV_PUSH;
		    return TRUE;
		    }

		if (csv->allow_loose_quotes && !quoesc) {
		    /* ,1,"foo, 3"456",,bar,\r\n
		     *            ^
		     */
		    CSV_PUT_SV (c);
		    c = c2;
		    goto restart;
		    }

		/* 1,"foo" ",3
		 *        ^
		 */
		if (quoesc) {
		    csv->used--;
		    ERROR_INSIDE_QUOTES (2023);
		    }

		ERROR_INSIDE_QUOTES (2011);
		}

	    /* !waitingForField, !InsideQuotes */
	    if (csv->allow_loose_quotes) { /* 1,foo "boo" d'uh,1 */
		f |= CSV_FLAGS_EIF;	/* Mark as error-in-field */
		CSV_PUT_SV (c);
		}
	    else
		ERROR_INSIDE_FIELD (2034);
	    } /* QUO char */
	else
	if (c == csv->escape_char && csv->escape_char) {
#if MAINT_DEBUG > 1
	    (void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = ESC '%c'\t%s\n",
		__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
		_pretty_strl (csv->bptr + csv->used));
#endif
	    /* This means quote_char != escape_char */
	    if (waitingForField) {
		waitingForField = 0;
		if (csv->allow_unquoted_escape) {
		    /* The escape character is the first character of an
		     * unquoted field */
		    /* ... get and store next character */
		    int c2 = CSV_GET;

		    SvSetEmpty (sv);

		    if (c2 == EOF) {
			csv->used--;
			ERROR_INSIDE_FIELD (2035);
			}

		    if (c2 == '0')
			CSV_PUT_SV (0)
		    else
		    if ( is_QUOTE (c2) || is_SEP (c2) ||
			 c2 == csv->escape_char || csv->allow_loose_escapes) {
			if (csv->utf8)
			    f |= CSV_FLAGS_BIN;
			CSV_PUT_SV (c2)
			}
		    else {
			csv->used--;
			ERROR_INSIDE_QUOTES (2025);
			}
		    }
		}
	    else
	    if (f & CSV_FLAGS_QUO) {
		int c2 = CSV_GET;

		if (c2 == EOF) {
		    csv->used--;
		    ERROR_INSIDE_QUOTES (2024);
		    }

		if (c2 == '0')
		    CSV_PUT_SV (0)
		else
		if ( is_QUOTE (c2) || is_SEP (c2) ||
		     c2 == csv->escape_char || csv->allow_loose_escapes) {
		    if (csv->utf8)
			f |= CSV_FLAGS_BIN;
		    CSV_PUT_SV (c2)
		    }
		else {
		    csv->used--;
		    ERROR_INSIDE_QUOTES (2025);
		    }
		}
	    else
	    if (sv) {
		int c2 = CSV_GET;

		if (c2 == EOF) {
		    csv->used--;
		    ERROR_INSIDE_FIELD (2035);
		    }

		CSV_PUT_SV (c2);
		}
	    else
		ERROR_INSIDE_FIELD (2036); /* uncoverable statement I think there's no way to get here */
	    } /* ESC char */
	else
	if (c == CH_NL || is_EOL (c)) {
EOLX:
#if MAINT_DEBUG > 1
	    (void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = NL\t%s\n",
		__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl,
		_pretty_strl (csv->bptr + csv->used));
#endif
	    if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
		SkipEmptyRow;
		goto restart;
		}

	    if (waitingForField) {
		/* ,1,"foo, 3",,bar,
		 *                  ^
		 */
		if (csv->blank_is_undef || csv->empty_is_undef)
		    SvSetUndef (sv);
		else
		    SvSetEmpty (sv);
		unless (csv->is_bound)
		    av_push (fields, sv);
		if (csv->keep_meta_info && fflags)
		    av_push (fflags, newSViv (f));
		return TRUE;
		}

	    if (f & CSV_FLAGS_QUO) {
		/* ,1,"foo\n 3",,bar,
		 *        ^
		 */
		f |= CSV_FLAGS_BIN;
		unless (csv->binary)
		    ERROR_INSIDE_QUOTES (2021);

		CSV_PUT_SV (c);
		}
	    else
	    if (csv->verbatim) {
		/* ,1,foo\n 3,,bar,
		 * This feature should be deprecated
		 */
		f |= CSV_FLAGS_BIN;
		unless (csv->binary)
		    ERROR_INSIDE_FIELD (2030);

		CSV_PUT_SV (c);
		}
	    else {
		/* sep=,
		 *      ^
		 */
		if (csv->recno == 0 && csv->fld_idx == 1 && csv->useIO &&
			(csv->bptr[0] == 's' || csv->bptr[0] == 'S') &&
			(csv->bptr[1] == 'e' || csv->bptr[1] == 'E') &&
			(csv->bptr[2] == 'p' || csv->bptr[2] == 'P') &&
			 csv->bptr[3] == '=') {
		    char *sep = csv->bptr + 4;
		    int   lnu = csv->used - 5;
		    if (lnu <= MAX_ATTR_LEN) {
			sep[lnu] = (char)0;
			(void)memcpy (csv->sep, sep, lnu);
			csv->sep_len = lnu == 1 ? 0 : lnu;
			return Parse (csv, src, fields, fflags);
			}
		    }

		/* ,1,"foo\n 3",,bar
		 *                  ^
		 */
		AV_PUSH;
		return TRUE;
		}
	    } /* CH_NL */
	else
	if (c == CH_CR && !(csv->verbatim)) {
#if MAINT_DEBUG > 1
	    (void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = CR\n",
		__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl);
#endif
	    if (waitingForField) {
		int	c2;

		if (csv->eol_is_cr) {
		    /* ,1,"foo\n 3",,bar,\r
		     *                   ^
		     */
		    c = CH_NL;
		    goto EOLX;
		    }

		c2 = CSV_GET;

		if (c2 == EOF) {
		    /* ,1,"foo\n 3",,bar,\r
		     *                     ^
		     */
		    c = EOF;

#if MAINT_DEBUG > 9
		    (void)fprintf (stderr, "# %04d (%d) ... CR EOF 0x%x\n",
			__LINE__, seenSomething, c);
#endif
		    unless (seenSomething)
			break;
		    goto restart;
		    }

		if (c2 == CH_NL) { /* \r is not optional before EOLX! */
		    /* ,1,"foo\n 3",,bar,\r\n
		     *                     ^
		     */
		    c = c2;
		    goto EOLX;
		    }

		if (csv->useIO && csv->eol_len == 0) {
		    if (c2 == CH_CR) { /* \r followed by an empty line */
			/* ,1,"foo\n 3",,bar,\r\r
			 *                     ^
			 */
			set_eol_is_cr (csv);
			goto EOLX;
			}

		    waitingForField = 0;

		    if (!is_csv_binary (c2)) {
			/* ,1,"foo\n 3",,bar,\r
			 * baz,4
			 * ^
			 */
			set_eol_is_cr (csv);
			csv->used--;
			csv->has_ahead++;
			if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
			    SkipEmptyRow;
			    goto restart;
			    }
			AV_PUSH;
			return TRUE;
			}
		    }

		/* ,1,"foo\n 3",,bar,\r\t
		 *                     ^
		 */
		csv->used--;
		ERROR_INSIDE_FIELD (2031);
		}

	    if (f & CSV_FLAGS_QUO) {
		/* ,1,"foo\r 3",,bar,\r\t
		 *        ^
		 */
		f |= CSV_FLAGS_BIN;
		unless (csv->binary)
		    ERROR_INSIDE_QUOTES (2022);

		CSV_PUT_SV (c);
		}
	    else {
		int	c2;

		if (csv->eol_is_cr) {
		    /* ,1,"foo\n 3",,bar\r
		     *                  ^
		     */
		    goto EOLX;
		    }

		c2 = CSV_GET;

		if (c2 == CH_NL) { /* \r is not optional before EOLX! */
		    /* ,1,"foo\n 3",,bar\r\n
		     *                    ^
		     */
		    goto EOLX;
		    }

		if (csv->useIO && csv->eol_len == 0) {
		    if (!is_csv_binary (c2)
			    /* ,1,"foo\n 3",,bar\r
			     * baz,4
			     * ^
			     */
			|| c2 == CH_CR) {
			    /* ,1,"foo\n 3",,bar,\r\r
			     *                     ^
			     */
			set_eol_is_cr (csv);
			csv->used--;
			csv->has_ahead++;
			if (fnum == 1 && f == 0 && SvCUR (sv) == 0 && csv->skip_empty_rows) {
			    SkipEmptyRow;
			    goto restart;
			    }
			AV_PUSH;
			return TRUE;
			}
		    }

		/* ,1,"foo\n 3",,bar\r\t
		 *                    ^
		 */
		ERROR_INSIDE_FIELD (2032);
		}
	    } /* CH_CR */
	else {
#if MAINT_DEBUG > 1
	    (void)fprintf (stderr, "# %04d %d/%d/%03x pos %d = CCC '%c'\t\t%s\n",
		__LINE__, waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c,
		_pretty_strl (csv->bptr + csv->used));
#endif
	    /* Needed for non-IO parse, where EOL is not set during read */
	    if (csv->eolx && c == CH_EOL &&
		 csv->size - csv->used >= (STRLEN)csv->eol_len - 1 &&
		 !memcmp (csv->bptr + csv->used, csv->eol + 1, csv->eol_len - 1) &&
		 (csv->used += csv->eol_len - 1)) {
		c = CH_EOLX;
#if MAINT_DEBUG > 5
		(void)fprintf (stderr, "# %04d -> EOLX (0x%x)\n", __LINE__, c);
#endif
		goto EOLX;
		}

	    if (waitingForField) {
		if (csv->comment_str && !f && !spl && c == *csv->comment_str) {
		    STRLEN cl = strlen ((char *)csv->comment_str);

#if MAINT_DEBUG > 5
		    (void)fprintf (stderr,
			"# %04d COMMENT? cl = %d, size = %d, used = %d\n",
			__LINE__, cl, csv->size, csv->used);
#endif
		    if (cl == 1 || (
		       (csv->size - csv->used >= cl - 1 &&
			 !memcmp (csv->bptr + csv->used, csv->comment_str + 1, cl - 1) &&
			 (csv->used += cl - 1)))) {
			csv->used     = csv->size;
			csv->fld_idx  = csv->strict_n ? csv->strict_n - 1 : 0;
			c             = CSV_GET;
			seenSomething = FALSE;
#if MAINT_DEBUG > 5
			(void)fprintf (stderr, "# %04d COMMENT, SKIPPED\n", __LINE__);
#endif
			if (c == EOF)
			    break;
			goto restart;
			}
		    }

		if (csv->allow_whitespace && is_whitespace (c)) {
		    do {
			c = CSV_GET;
#if MAINT_DEBUG > 5
			(void)fprintf (stderr, "# %04d WS next got (0x%x)\n", __LINE__, c);
#endif
			} while (is_whitespace (c));
		    if (c == EOF)
			break;
		    goto restart;
		    }
		waitingForField = 0;
		goto restart;
		}

#if MAINT_DEBUG > 5
	    (void)fprintf (stderr, "# %04d %sc 0x%x is%s binary %s utf8\n",
		__LINE__, f & CSV_FLAGS_QUO ? "quoted " : "", c,
		is_csv_binary (c) ? "" : " not",
		csv->utf8 ? "is" : "not");
#endif
	    if (f & CSV_FLAGS_QUO) {
		if (is_csv_binary (c)) {
		    f |= CSV_FLAGS_BIN;
		    unless (csv->binary || csv->utf8)
			ERROR_INSIDE_QUOTES (2026);
		    }
		CSV_PUT_SV (c);
		}
	    else {
		if (is_csv_binary (c)) {
		    if (csv->useIO && c == EOF)
			break;
		    f |= CSV_FLAGS_BIN;
		    unless (csv->binary || csv->utf8)
			ERROR_INSIDE_FIELD (2037);
		    }
		CSV_PUT_SV (c);
		}
	    }

	/* continue */
	if (csv->verbatim && csv->useIO && csv->used == csv->size)
	    break;
	}

    if (waitingForField) {
	if (seenSomething || !csv->useIO) {
	    NewField;
	    if (csv->blank_is_undef || csv->empty_is_undef)
		SvSetUndef (sv);
	    else
		SvSetEmpty (sv);
	    unless (csv->is_bound)
		av_push (fields, sv);
	    if (csv->keep_meta_info && fflags)
		av_push (fflags, newSViv (f));
	    return TRUE;
	    }

	(void)SetDiag (csv, 2012);
	return FALSE;
	}

    if (f & CSV_FLAGS_QUO)
	ERROR_INSIDE_QUOTES (2027);

    if (sv) {
	AV_PUSH;
	}
    else if (f == 0 && fnum == 1 && csv->skip_empty_rows == 1)
	return FALSE;
    return TRUE;
    } /* Parse */

static int hook (pTHX_ HV *hv, char *cb_name, AV *av) {
    SV **svp;
    HV *cb;
    int res;

#if MAINT_DEBUG > 1
    (void)fprintf (stderr, "# %04d HOOK %s %x\n", __LINE__, cb_name, av);
#endif
    unless ((svp = hv_fetchs (hv, "callbacks", FALSE)) && _is_hashref (*svp))
	return 0; /* uncoverable statement defensive programming */

    cb  = (HV *)SvRV (*svp);
    svp = hv_fetch (cb, cb_name, strlen (cb_name), FALSE);
    unless (svp && _is_coderef (*svp))
	return 0;

    {   dSP;
	ENTER;
	SAVETMPS;
	PUSHMARK (SP);
	mXPUSHs (newRV_inc ((SV *)hv));
	mXPUSHs (newRV_inc ((SV *)av));
	PUTBACK;
	res = call_sv (*svp, G_SCALAR);
	SPAGAIN;
	if (res) {
	    SV *rv = POPs;
	    if (SvROK (rv) && (rv = SvRV (rv)) && SvPOK (rv)) {
		if (strcmp (SvPV_nolen (rv), "skip") == 0)
		    res = 0;
		}
	    }
	PUTBACK;
	FREETMPS;
	LEAVE;
	}
    return res;
    } /* hook */

#define c_xsParse(csv,hv,av,avf,src,useIO)	cx_c_xsParse (aTHX_ csv, hv, av, avf, src, useIO)
static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
    int	result, ahead = 0;
    SV	*pos = NULL;

    ENTER;
    if (csv.eolx || csv.eol_is_cr) {
	/* local $/ = $eol */
	SAVEGENERICSV (PL_rs);
	PL_rs = newSVpvn ((char *)csv.eol, csv.eol_len);
	}

    if ((csv.useIO = useIO)) {
	csv.tmp = NULL;

	if ((ahead = csv.has_ahead)) {
	    SV **svp;
	    if ((svp = hv_fetchs (hv, "_AHEAD", FALSE)) && *svp) {
		csv.bptr = SvPV (csv.tmp = *svp, csv.size);
		csv.used = 0;
		if (pos && SvIV (pos) > (IV)csv.size)
		    sv_setiv (pos, SvIV (pos) - csv.size);
		}
	    }
	}
    else {
	csv.tmp  = src;
	csv.utf8 = SvUTF8 (src) ? 1 : 0;
	csv.bptr = SvPV (src, csv.size);
	}
    if (csv.has_error_input) {
	(void)hv_store (hv, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
	csv.has_error_input = 0;
	}

    result = Parse (&csv, src, av, avf);
    (void)hv_store (hv, "_RECNO", 6, newSViv (++csv.recno), 0);
    (void)hv_store (hv, "_EOF",   4, &PL_sv_no,             0);

    if (csv.strict) {
	STRLEN nf = av_len (av);
#if MAINT_DEBUG > 6
	(void)fprintf (stderr, "# %04d Strict nf = %2d, n = %2d, idx = %2d, recno = %2d, res = %d\n",
	    __LINE__, nf, csv.strict_n, csv.fld_idx, csv.recno, result);
#endif

	if (nf && !csv.strict_n) csv.strict_n = (short)nf;
	if (csv.strict_n > 0 && nf != csv.strict_n) {
	    unless (csv.useIO & useIO_EOF)
		ParseError (&csv, 2014, csv.used);
	    if (last_error) /* an error callback can reset and accept */
		result = FALSE;
	    }
	}

    if (csv.useIO) {
	if (csv.tmp && csv.used < csv.size && csv.has_ahead) {
	    SV *sv = newSVpvn (csv.bptr + csv.used, csv.size - csv.used);
	    (void)hv_store  (hv, "_AHEAD", 6, sv, 0);
	    }
	else {
	    csv.has_ahead = 0;
	    if (csv.useIO & useIO_EOF)
		(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
	    }
	/* csv.cache[CACHE_ID__has_ahead] = csv.has_ahead; */
	(void)memcpy (csv.cache, &csv, sizeof (csv_t));

	if (avf) {
	    if (csv.keep_meta_info)
		(void)hv_store  (hv, "_FFLAGS", 7, newRV_noinc ((SV *)avf), 0);
	    else {
		av_undef (avf);
		sv_free ((SV *)avf);
		}
	    }
	}
    else { /* just copy to the cache */
	SV **svp = hv_fetchs (hv, "_CACHE", FALSE);

	if (svp && *svp)
	    csv.cache = (byte *)SvPV_nolen (*svp);
	(void)memcpy (csv.cache, &csv, sizeof (csv_t));
	}

    if (result && csv.types) {
	STRLEN	i;
	STRLEN	len = av_len (av);
	SV    **svp;

	for (i = 0; i <= len && i <= csv.types_len; i++) {
	    if ((svp = av_fetch (av, i, FALSE)) && *svp && SvOK (*svp)) {
		switch (csv.types[i]) {
		    case CSV_XS_TYPE_IV:
#ifdef CSV_XS_TYPE_WARN
			sv_setiv (*svp, SvIV (*svp));
#else
			if (SvTRUE (*svp))
			    sv_setiv (*svp, SvIV (*svp));
			else
			    sv_setiv (*svp, 0);
#endif
			break;

		    case CSV_XS_TYPE_NV:
#ifdef CSV_XS_TYPE_WARN
			sv_setnv (*svp, SvNV (*svp));
#else
			if (SvTRUE (*svp))
			    sv_setnv (*svp, SvNV (*svp));
			else
			    sv_setnv (*svp, 0.0);
#endif
			break;

		    default:
			break;
		    }
		}
	    }
	}

    LEAVE;

    return result;
    } /* c_xsParse */

#define xsParse(self,hv,av,avf,src,useIO)	cx_xsParse (aTHX_ self, hv, av, avf, src, useIO)
static int cx_xsParse (pTHX_ SV *self, HV *hv, AV *av, AV *avf, SV *src, bool useIO) {
    csv_t	csv;
    int		state;
    SetupCsv (&csv, hv, self);
    state = c_xsParse (csv, hv, av, avf, src, useIO);
    if (state && csv.has_hooks & HOOK_AFTER_PARSE)
	(void)hook (aTHX_ hv, "after_parse", av);
    return (state || !last_error);
    } /* xsParse */

/* API also offers av_clear and av_undef, but they have more overhead */
#define av_empty(av)	cx_av_empty (aTHX_ av)
static void cx_av_empty (pTHX_ AV *av) {
    while (av_len (av) >= 0)
	sv_free (av_pop (av));
    } /* av_empty */

#define xsParse_all(self,hv,io,off,len)		cx_xsParse_all (aTHX_ self, hv, io, off, len)
static SV *cx_xsParse_all (pTHX_ SV *self, HV *hv, SV *io, SV *off, SV *len) {
    csv_t	csv;
    int		n = 0, skip = 0, length = MAXINT, tail = MAXINT;
    AV		*avr = newAV ();
    AV		*row = newAV ();

    SetupCsv (&csv, hv, self);

    if (SvOK (off)) {
	skip = SvIV (off);
	if (skip < 0) {
	    tail = -skip;
	    skip = -1;
	    }
	}
    if (SvOK (len))
	length = SvIV (len);

    while (c_xsParse (csv, hv, row, NULL, io, 1)) {

	SetupCsv (&csv, hv, self);

	if (skip > 0) {
	    skip--;
	    av_empty (row); /* re-use */
	    continue;
	    }

	if (n++ >= tail) {
	    SvREFCNT_dec (av_shift (avr));
	    n--;
	    }

	if (csv.has_hooks & HOOK_AFTER_PARSE) {
	    unless (hook (aTHX_ hv, "after_parse", row)) {
		av_empty (row); /* re-use */
		continue;
		}
	    }
	av_push (avr, newRV_noinc ((SV *)row));

	if (n >= length && skip >= 0)
	    break; /* We have enough */

	row = newAV ();
	}
    while (n > length) {
	SvREFCNT_dec (av_pop (avr));
	n--;
	}

    return (SV *)sv_2mortal (newRV_noinc ((SV *)avr));
    } /* xsParse_all */

#define xsCombine(self,hv,av,io,useIO)	cx_xsCombine (aTHX_ self, hv, av, io, useIO)
static int cx_xsCombine (pTHX_ SV *self, HV *hv, AV *av, SV *io, bool useIO) {
    csv_t	csv;
    int		result;
#if (PERL_BCDVERSION >= 0x5008000)
    SV		*ors = PL_ors_sv;
#endif

    SetupCsv (&csv, hv, self);
    csv.useIO = useIO;
#if (PERL_BCDVERSION >= 0x5008000)
    if (*csv.eol)
	PL_ors_sv = NULL;
#endif
    if (useIO && csv.has_hooks & HOOK_BEFORE_PRINT)
	(void)hook (aTHX_ hv, "before_print", av);
    result = Combine (&csv, io, av);
#if (PERL_BCDVERSION >= 0x5008000)
    PL_ors_sv = ors;
#endif
    if (result && !useIO && csv.utf8)
	sv_utf8_upgrade (io);
    return result;
    } /* xsCombine */

MODULE = Text::CSV_XS		PACKAGE = Text::CSV_XS

PROTOTYPES: DISABLE

BOOT:
    m_getline = newSVpvs ("getline");
    m_print   = newSVpvs ("print");
    Perl_load_module (aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs ("IO::Handle"), NULL, NULL, NULL);

void
SetDiag (SV *self, int xse, ...)

  PPCODE:
    HV		*hv;
    csv_t	csv;

    if (SvOK (self) && SvROK (self)) {
	CSV_XS_SELF;
	SetupCsv (&csv, hv, self);
	ST (0) = SetDiag (&csv, xse);
	}
    else {
	last_error = xse;
	ST (0) = sv_2mortal (SvDiag (xse));
	}

    if (xse && items > 2 && SvPOK (ST (2))) {
	sv_setpvn (ST (0),  SvPVX (ST (2)), SvCUR (ST (2)));
	SvIOK_on  (ST (0));
	}

    XSRETURN (1);
    /* XS SetDiag */

void
error_input (SV *self)

  PPCODE:
    if (self && SvOK (self) && SvROK (self) && SvTYPE (SvRV (self)) == SVt_PVHV) {
	HV  *hv = (HV *)SvRV (self);
	SV **sv = hv_fetchs (hv, "_ERROR_INPUT", FALSE);
	if (SvOK (*sv))
	    ST (0) = *sv;
	else
	    ST (0) = newSV (0);
	}
    else
	ST (0) = newSV (0);

    XSRETURN (1);
    /* XS error_input */

void
Combine (SV *self, SV *dst, SV *fields, bool useIO)

  PPCODE:
    HV	*hv;
    AV	*av;

    CSV_XS_SELF;
    av = (AV *)SvRV (fields);
    ST (0) = xsCombine (self, hv, av, dst, useIO) ? &PL_sv_yes : &PL_sv_undef;
    XSRETURN (1);
    /* XS Combine */

void
Parse (SV *self, SV *src, SV *fields, SV *fflags)

  PPCODE:
    HV	*hv;
    AV	*av;
    AV	*avf;

    CSV_XS_SELF;
    av  = (AV *)SvRV (fields);
    avf = (AV *)SvRV (fflags);

    ST (0) = xsParse (self, hv, av, avf, src, 0) ? &PL_sv_yes : &PL_sv_no;
    XSRETURN (1);
    /* XS Parse */

void
print (SV *self, SV *io, SV *fields)

  PPCODE:
    HV	 *hv;
    AV	 *av;

    CSV_XS_SELF;
    if (fields == &PL_sv_undef)
	av = newAV ();
    else {
	unless (_is_arrayref (fields))
	    croak ("Expected fields to be an array ref");

	av = (AV *)SvRV (fields);
	}

    ST (0) = xsCombine (self, hv, av, io, 1) ? &PL_sv_yes : &PL_sv_no;
    XSRETURN (1);
    /* XS print */

void
getline (SV *self, SV *io)

  PPCODE:
    HV	*hv;
    AV	*av;
    AV	*avf;

    CSV_XS_SELF;
    av  = newAV ();
    avf = newAV ();
    ST (0) = xsParse (self, hv, av, avf, io, 1)
	? sv_2mortal (newRV_noinc ((SV *)av))
	: &PL_sv_undef;
    XSRETURN (1);
    /* XS getline */

void
getline_all (SV *self, SV *io, ...)

  PPCODE:
    HV	*hv;
    SV  *offset, *length;

    CSV_XS_SELF;

    offset = items > 2 ? ST (2) : &PL_sv_undef;
    length = items > 3 ? ST (3) : &PL_sv_undef;

    ST (0) = xsParse_all (self, hv, io, offset, length);
    XSRETURN (1);
    /* XS getline_all */

void
_cache_set (SV *self, int idx, SV *val)

  PPCODE:
    HV	*hv;

    CSV_XS_SELF;
    xs_cache_set (hv, idx, val);
    XSRETURN (1);
    /* XS _cache_set */

void
_cache_diag (SV *self)

  PPCODE:
    HV	*hv;

    CSV_XS_SELF;
    xs_cache_diag (hv);
    XSRETURN (1);
    /* XS _cache_diag */


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.